+// MUSTACHE-GLOBAL-INDEX:

Global Namespace

+// MUSTACHE-GLOBAL-INDEX:

Namespaces

+// MUSTACHE-GLOBAL-INDEX:
  • @nonymous_namespace
  • +// MUSTACHE-GLOBAL-INDEX:
  • AnotherNamespace
  • +// MUSTACHE-GLOBAL-INDEX:
  • PrimaryNamespace
  • + // MD-GLOBAL-INDEX: # Global Namespace // MD-GLOBAL-INDEX: ## Namespaces // MD-GLOBAL-INDEX: * [@nonymous_namespace](..{{[\/]}}@nonymous_namespace{{[\/]}}index.md) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index dfef341c0e257..3b67ee3819507 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -734,6 +734,8 @@ Bug Fixes in This Version - Accept empty enumerations in MSVC-compatible C mode. (#GH114402) - Fix a bug leading to incorrect code generation with complex number compound assignment and bitfield values, which also caused a crash with UBsan. (#GH166798) - Fixed false-positive shadow diagnostics for lambdas in explicit object member functions. (#GH163731) +- Fix an assertion failure when a ``target_clones`` attribute is only on the + forward declaration of a multiversioned function. (#GH165517) (#GH129483) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index c99fd6f0bfcc4..7321bf4ea8963 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -240,7 +240,6 @@ struct MissingFeatures { static bool ctorConstLvalueToRvalueConversion() { return false; } static bool ctorMemcpyizer() { return false; } static bool cudaSupport() { return false; } - static bool cxxRecordStaticMembers() { return false; } static bool dataLayoutTypeIsSized() { return false; } static bool dataLayoutTypeAllocSize() { return false; } static bool dataLayoutTypeStoreSize() { return false; } diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 555aa5c050ffd..591457b1d66b4 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1790,7 +1790,9 @@ void NamedDecl::printNestedNameSpecifier(raw_ostream &OS, else OS << *ND; } else if (const auto *RD = dyn_cast(DC)) { - if (!RD->getIdentifier()) + if (TypedefNameDecl *TD = RD->getTypedefNameForAnonDecl()) + OS << *TD; + else if (!RD->getIdentifier()) OS << "(anonymous " << RD->getKindName() << ')'; else OS << *RD; diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp index 40888e7326659..41a193e4d85c5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp @@ -19,12 +19,9 @@ using namespace clang::CIRGen; namespace { struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup { - SourceRange declareRange; mlir::acc::DeclareEnterOp enterOp; - OpenACCDeclareCleanup(SourceRange declareRange, - mlir::acc::DeclareEnterOp enterOp) - : declareRange(declareRange), enterOp(enterOp) {} + OpenACCDeclareCleanup(mlir::acc::DeclareEnterOp enterOp) : enterOp(enterOp) {} template void createOutOp(CIRGenFunction &cgf, InTy inOp) { @@ -55,8 +52,8 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup { if (auto copyin = val.getDefiningOp()) { switch (copyin.getDataClause()) { default: - cgf.cgm.errorNYI(declareRange, - "OpenACC local declare clause copyin cleanup"); + llvm_unreachable( + "OpenACC local declare clause copyin unexpected data clause"); break; case mlir::acc::DataClause::acc_copy: createOutOp(cgf, copyin); @@ -65,6 +62,24 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup { createOutOp(cgf, copyin); break; } + } else if (auto create = val.getDefiningOp()) { + switch (create.getDataClause()) { + default: + llvm_unreachable( + "OpenACC local declare clause create unexpected data clause"); + break; + case mlir::acc::DataClause::acc_copyout: + createOutOp(cgf, create); + break; + case mlir::acc::DataClause::acc_create: + createOutOp(cgf, create); + break; + } + } else if (auto present = val.getDefiningOp()) { + createOutOp(cgf, present); + } else if (auto dev_res = + val.getDefiningOp()) { + createOutOp(cgf, dev_res); } else if (val.getDefiningOp()) { // Link has no exit clauses, and shouldn't be copied. continue; @@ -72,7 +87,7 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup { // DevicePtr has no exit clauses, and shouldn't be copied. continue; } else { - cgf.cgm.errorNYI(declareRange, "OpenACC local declare clause cleanup"); + llvm_unreachable("OpenACC local declare clause unexpected defining op"); continue; } exitOp.getDataClauseOperandsMutable().append(val); @@ -91,7 +106,7 @@ void CIRGenFunction::emitOpenACCDeclare(const OpenACCDeclareDecl &d) { d.clauses()); ehStack.pushCleanup(CleanupKind::NormalCleanup, - d.getSourceRange(), enterOp); + enterOp); } void CIRGenFunction::emitOpenACCRoutine(const OpenACCRoutineDecl &d) { diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index e09d3de5aac9a..251c99c8cd45b 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -1556,10 +1556,14 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) { break; case Decl::ClassTemplateSpecialization: - case Decl::CXXRecord: + case Decl::CXXRecord: { + CXXRecordDecl *crd = cast(decl); assert(!cir::MissingFeatures::generateDebugInfo()); - assert(!cir::MissingFeatures::cxxRecordStaticMembers()); + for (auto *childDecl : crd->decls()) + if (isa(childDecl)) + emitTopLevelDecl(childDecl); break; + } case Decl::FileScopeAsm: // File-scope asm is ignored during device-side CUDA compilation. diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp index 1e7a332d1dc22..60a089fe0e936 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp @@ -853,12 +853,16 @@ class OpenACCClauseCIREmitter final var, mlir::acc::DataClause::acc_copyout, clause.getModifierList(), /*structured=*/false, /*implicit=*/false); + } else if constexpr (isOneOfTypes) { + for (const Expr *var : clause.getVarList()) + addDataOperand( + var, mlir::acc::DataClause::acc_copyout, clause.getModifierList(), + /*structured=*/true, + /*implicit=*/false); } else if constexpr (isCombinedType) { applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. declare construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitCopyOutClause"); } } @@ -875,12 +879,16 @@ class OpenACCClauseCIREmitter final addDataOperand( var, mlir::acc::DataClause::acc_create, clause.getModifierList(), /*structured=*/false, /*implicit=*/false); + } else if constexpr (isOneOfTypes) { + for (const Expr *var : clause.getVarList()) + addDataOperand( + var, mlir::acc::DataClause::acc_create, clause.getModifierList(), + /*structured=*/true, + /*implicit=*/false); } else if constexpr (isCombinedType) { applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. declare construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitCreateClause"); } } @@ -976,12 +984,16 @@ class OpenACCClauseCIREmitter final addDataOperand( var, mlir::acc::DataClause::acc_present, {}, /*structured=*/true, /*implicit=*/false); + } else if constexpr (isOneOfTypes) { + for (const Expr *var : clause.getVarList()) + addDataOperand( + var, mlir::acc::DataClause::acc_present, {}, + /*structured=*/true, + /*implicit=*/false); } else if constexpr (isCombinedType) { applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. declare remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitPresentClause"); } } @@ -1123,6 +1135,18 @@ class OpenACCClauseCIREmitter final llvm_unreachable("Unknown construct kind in VisitReductionClause"); } } + + void VisitDeviceResidentClause(const OpenACCDeviceResidentClause &clause) { + if constexpr (isOneOfTypes) { + for (const Expr *var : clause.getVarList()) + addDataOperand( + var, mlir::acc::DataClause::acc_declare_device_resident, {}, + /*structured=*/true, + /*implicit=*/false); + } else { + llvm_unreachable("Unknown construct kind in VisitDeviceResidentClause"); + } + } }; template diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 5ecf68db18858..f7a1b78dad95d 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -11996,6 +11996,16 @@ static bool CheckMultiVersionAdditionalDecl( } } + // Redeclarations of a target_clones function may omit the attribute, in which + // case it will be inherited during declaration merging. + if (NewMVKind == MultiVersionKind::None && + OldMVKind == MultiVersionKind::TargetClones) { + NewFD->setIsMultiVersion(); + Redeclaration = true; + OldDecl = OldFD; + return false; + } + // Else, this is simply a non-redecl case. Checking the 'value' is only // necessary in the Target case, since The CPUSpecific/Dispatch cases are // handled in the attribute adding step. @@ -12119,8 +12129,9 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD, } // At this point, we have a multiversion function decl (in OldFD) AND an - // appropriate attribute in the current function decl. Resolve that these are - // still compatible with previous declarations. + // appropriate attribute in the current function decl (unless it's allowed to + // omit the attribute). Resolve that these are still compatible with previous + // declarations. return CheckMultiVersionAdditionalDecl(S, OldFD, NewFD, NewCPUDisp, NewCPUSpec, NewClones, Redeclaration, OldDecl, Previous); diff --git a/clang/test/CIR/CodeGen/static-members.cpp b/clang/test/CIR/CodeGen/static-members.cpp new file mode 100644 index 0000000000000..8722dc2a2bc6f --- /dev/null +++ b/clang/test/CIR/CodeGen/static-members.cpp @@ -0,0 +1,94 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck %s -check-prefix=CIR --input-file=%t.cir +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck %s -check-prefix=LLVM --input-file=%t-cir.ll +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck %s -check-prefix=OGCG --input-file=%t.ll + +struct HasDtor { + ~HasDtor(); +}; +struct S { + static inline HasDtor hd; +}; + +// CIR: cir.global linkonce_odr comdat @_ZN1S2hdE = #cir.zero : !rec_HasDtor + +// CIR: cir.func internal private @__cxx_global_var_init() { +// CIR: %[[HD:.*]] = cir.get_global @_ZN1S2hdE : !cir.ptr +// CIR: %[[DTOR:.*]] = cir.get_global @_ZN7HasDtorD1Ev : !cir.ptr)>> +// CIR: %[[DTOR_CAST:.*]] = cir.cast bitcast %[[DTOR]] : !cir.ptr)>> -> !cir.ptr)>> +// CIR: %[[HD_CAST:.*]] = cir.cast bitcast %[[HD]] : !cir.ptr -> !cir.ptr +// CIR: %[[HANDLE:.*]] = cir.get_global @__dso_handle : !cir.ptr +// CIR: cir.call @__cxa_atexit(%[[DTOR_CAST]], %[[HD_CAST]], %[[HANDLE]]) + +// LLVM: @_ZN1S2hdE = linkonce_odr global %struct.HasDtor zeroinitializer, comdat +// LLVM: @_ZN5Outer5Inner2hdE = linkonce_odr global %struct.HasDtor zeroinitializer, comdat + +// LLVM: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_static_members.cpp, ptr null }] +// LLVM: define internal void @__cxx_global_var_init() +// LLVM: call void @__cxa_atexit(ptr @_ZN7HasDtorD1Ev, ptr @_ZN1S2hdE, ptr @__dso_handle) + +// FIXME(cir): OGCG has a guard variable for this case that we don't generate in CIR. +// This is needed because the variable linkonce_odr linkage. + +// OGCG: @_ZN1S2hdE = linkonce_odr global %struct.HasDtor zeroinitializer, comdat +// OGCG: @_ZGVN1S2hdE = linkonce_odr global i64 0, comdat($_ZN1S2hdE) +// OGCG: @_ZN5Outer5Inner2hdE = linkonce_odr global %struct.HasDtor zeroinitializer, comdat +// OGCG: @_ZGVN5Outer5Inner2hdE = linkonce_odr global i64 0, comdat($_ZN5Outer5Inner2hdE) +// OGCG: @llvm.global_ctors = appending global [2 x { i32, ptr, ptr }] [ +// OGCG-SAME: { i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init, ptr @_ZN1S2hdE }, +// OGCG-SAME: { i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init.1, ptr @_ZN5Outer5Inner2hdE }] + +// OGCG: define internal void @__cxx_global_var_init() {{.*}} section ".text.startup" comdat($_ZN1S2hdE) { +// OGCG: %[[GUARD:.*]] = load atomic i8, ptr @_ZGVN1S2hdE acquire +// OGCG: %[[UNINIT:.*]] = icmp eq i8 %[[GUARD]], 0 +// OGCG: br i1 %[[UNINIT]], label %[[INIT_CHECK:.*]], label %[[INIT_END:.*]] +// OGCG: [[INIT_CHECK:.*]]: +// OGCG: %[[GUARD_ACQUIRE:.*]] = call i32 @__cxa_guard_acquire(ptr @_ZGVN1S2hdE) +// OGCG: %[[TOBOOL:.*]] = icmp ne i32 %[[GUARD_ACQUIRE]], 0 +// OGCG: br i1 %[[TOBOOL]], label %[[INIT:.*]], label %[[INIT_END]] +// OGCG: [[INIT:.*]]: +// OGCG: %[[ATEXIT:.*]] = call i32 @__cxa_atexit(ptr @_ZN7HasDtorD1Ev, ptr @_ZN1S2hdE, ptr @__dso_handle) +// OGCG: call void @__cxa_guard_release(ptr @_ZGVN1S2hdE) +// OGCG: br label %[[INIT_END]] +// OGCG: [[INIT_END]]: + +struct Outer { + struct Inner { + static inline HasDtor hd; + }; +}; + +// CIR: cir.global linkonce_odr comdat @_ZN5Outer5Inner2hdE = #cir.zero : !rec_HasDtor +// CIR: cir.func internal private @__cxx_global_var_init.1() +// CIR: %[[HD:.*]] = cir.get_global @_ZN5Outer5Inner2hdE : !cir.ptr +// CIR: %[[DTOR:.*]] = cir.get_global @_ZN7HasDtorD1Ev : !cir.ptr)>> +// CIR: %[[DTOR_CAST:.*]] = cir.cast bitcast %[[DTOR]] : !cir.ptr)>> -> !cir.ptr)>> +// CIR: %[[HD_CAST:.*]] = cir.cast bitcast %[[HD]] : !cir.ptr -> !cir.ptr +// CIR: %[[HANDLE:.*]] = cir.get_global @__dso_handle : !cir.ptr +// CIR: cir.call @__cxa_atexit(%[[DTOR_CAST]], %[[HD_CAST]], %[[HANDLE]]) : (!cir.ptr)>>, !cir.ptr, !cir.ptr) -> () + +// LLVM: define internal void @__cxx_global_var_init.1() +// LLVM: call void @__cxa_atexit(ptr @_ZN7HasDtorD1Ev, ptr @_ZN5Outer5Inner2hdE, ptr @__dso_handle) + +// OGCG: define internal void @__cxx_global_var_init.1() {{.*}} section ".text.startup" comdat($_ZN5Outer5Inner2hdE) { +// OGCG: %[[GUARD:.*]] = load atomic i8, ptr @_ZGVN5Outer5Inner2hdE acquire +// OGCG: %[[UNINIT:.*]] = icmp eq i8 %[[GUARD]], 0 +// OGCG: br i1 %[[UNINIT]], label %[[INIT_CHECK:.*]], label %[[INIT_END:.*]] +// OGCG: [[INIT_CHECK:.*]]: +// OGCG: %[[GUARD_ACQUIRE:.*]] = call i32 @__cxa_guard_acquire(ptr @_ZGVN5Outer5Inner2hdE) +// OGCG: %[[TOBOOL:.*]] = icmp ne i32 %[[GUARD_ACQUIRE]], 0 +// OGCG: br i1 %[[TOBOOL]], label %[[INIT:.*]], label %[[INIT_END]] +// OGCG: [[INIT:.*]]: +// OGCG: %[[ATEXIT:.*]] = call i32 @__cxa_atexit(ptr @_ZN7HasDtorD1Ev, ptr @_ZN5Outer5Inner2hdE, ptr @__dso_handle) +// OGCG: call void @__cxa_guard_release(ptr @_ZGVN5Outer5Inner2hdE) +// OGCG: br label %[[INIT_END]] +// OGCG: [[INIT_END]]: + + +// CIR: cir.func private @_GLOBAL__sub_I_static_members.cpp() +// CIR: cir.call @__cxx_global_var_init() + +// LLVM: define void @_GLOBAL__sub_I_static_members.cpp() +// LLVM: call void @__cxx_global_var_init() diff --git a/clang/test/CIR/CodeGenOpenACC/declare-copyout.cpp b/clang/test/CIR/CodeGenOpenACC/declare-copyout.cpp new file mode 100644 index 0000000000000..1d79cef894d5e --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/declare-copyout.cpp @@ -0,0 +1,199 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +struct HasSideEffects { + HasSideEffects(); + ~HasSideEffects(); +}; + +// TODO: OpenACC: Implement 'global', NS lowering. + +struct Struct { + static const HasSideEffects StaticMemHSE; + static const HasSideEffects StaticMemHSEArr[5]; + static const int StaticMemInt; + + // TODO: OpenACC: Implement static-local lowering. + + void MemFunc1(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc1{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + int LocalInt; + +#pragma acc declare copyout(always:ArgHSE, ArgInt, LocalHSE, LocalInt, ArgHSEPtr[1:1], LocalHSEArr[1:1]) + // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {dataClause = #acc, modifiers = #acc, name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER]]) dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_CREATE]] : !cir.ptr) to varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) {modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_INT_CREATE]] : !cir.ptr) to varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) {modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_CREATE]] : !cir.ptr) to varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) {modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_INT_CREATE]] : !cir.ptr) to varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) {modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_PTR_CREATE]] : !cir.ptr>) bounds(%[[BOUND1]]) to varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) {modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_ARR_CREATE]] : !cir.ptr>) bounds(%[[BOUND2]]) to varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) {modifiers = #acc, name = "LocalHSEArr[1:1]"} + } + void MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr); +}; + +void use() { + Struct s; + s.MemFunc1(HasSideEffects{}, 0, nullptr); +} + +void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc2{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare copyout(alwaysout:ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + +#pragma acc declare copyout(alwaysout:LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {dataClause = #acc, modifiers = #acc, name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_CREATE]] : !cir.ptr) to varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) {modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_INT_CREATE]] : !cir.ptr) to varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) {modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_ARR_CREATE]] : !cir.ptr>) bounds(%[[BOUND2]]) to varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) {modifiers = #acc, name = "LocalHSEArr[1:1]"} + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_CREATE]] : !cir.ptr) to varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) {modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_INT_CREATE]] : !cir.ptr) to varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) {modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_PTR_CREATE]] : !cir.ptr>) bounds(%[[BOUND1]]) to varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) {modifiers = #acc, name = "ArgHSEPtr[1:1]"} +} + +extern "C" void do_thing(); + +extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}NormalFunc(%[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare copyout(always:ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + { + // CHECK-NEXT: cir.scope { +#pragma acc declare copyout(LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {dataClause = #acc, name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_CREATE]] : !cir.ptr) to varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) {name = "LocalHSE"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_INT_CREATE]] : !cir.ptr) to varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) {name = "LocalInt"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_ARR_CREATE]] : !cir.ptr>) bounds(%[[BOUND2]]) to varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) {name = "LocalHSEArr[1:1]"} + } + // CHECK-NEXT: } + + // Make sure that cleanup gets put in the right scope. + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_CREATE]] : !cir.ptr) to varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) {modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_INT_CREATE]] : !cir.ptr) to varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) {modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_PTR_CREATE]] : !cir.ptr>) bounds(%[[BOUND1]]) to varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) {modifiers = #acc, name = "ArgHSEPtr[1:1]"} +} + diff --git a/clang/test/CIR/CodeGenOpenACC/declare-create.cpp b/clang/test/CIR/CodeGenOpenACC/declare-create.cpp new file mode 100644 index 0000000000000..ef2f1de19ea96 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/declare-create.cpp @@ -0,0 +1,199 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +struct HasSideEffects { + HasSideEffects(); + ~HasSideEffects(); +}; + +// TODO: OpenACC: Implement 'global', NS lowering. + +struct Struct { + static const HasSideEffects StaticMemHSE; + static const HasSideEffects StaticMemHSEArr[5]; + static const int StaticMemInt; + + // TODO: OpenACC: Implement static-local lowering. + + void MemFunc1(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc1{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + int LocalInt; + +#pragma acc declare create(zero:ArgHSE, ArgInt, LocalHSE, LocalInt, ArgHSEPtr[1:1], LocalHSEArr[1:1]) + // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {modifiers = #acc, name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER]]) dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_CREATE]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_CREATE]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_CREATE]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_CREATE]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_CREATE]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_CREATE]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, modifiers = #acc, name = "LocalHSEArr[1:1]"} + } + void MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr); +}; + +void use() { + Struct s; + s.MemFunc1(HasSideEffects{}, 0, nullptr); +} + +void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc2{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare create(zero:ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + +#pragma acc declare create(zero:LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {modifiers = #acc, name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_CREATE]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_CREATE]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_CREATE]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, modifiers = #acc, name = "LocalHSEArr[1:1]"} + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_CREATE]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_CREATE]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_CREATE]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} +} + +extern "C" void do_thing(); + +extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}NormalFunc(%[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare create(zero:ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + { + // CHECK-NEXT: cir.scope { +#pragma acc declare create(LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_CREATE]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_CREATE]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_CREATE]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + } + // CHECK-NEXT: } + + // Make sure that cleanup gets put in the right scope. + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_CREATE]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_CREATE]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_CREATE]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} +} + diff --git a/clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp b/clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp new file mode 100644 index 0000000000000..dbec4f22a1bb3 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp @@ -0,0 +1,199 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +struct HasSideEffects { + HasSideEffects(); + ~HasSideEffects(); +}; + +// TODO: OpenACC: Implement 'global', NS lowering. + +struct Struct { + static const HasSideEffects StaticMemHSE; + static const HasSideEffects StaticMemHSEArr[5]; + static const int StaticMemInt; + + // TODO: OpenACC: Implement static-local lowering. + + void MemFunc1(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc1{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + int LocalInt; + +#pragma acc declare device_resident(ArgHSE, ArgInt, LocalHSE, LocalInt, ArgHSEPtr[1:1], LocalHSEArr[1:1]) + // CHECK: %[[ARG_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} + // CHECK-NEXT: %[[LOC_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER]]) dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + } + void MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr); +}; + +void use() { + Struct s; + s.MemFunc1(HasSideEffects{}, 0, nullptr); +} + +void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc2{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare device_resident(ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + +#pragma acc declare device_resident(LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, name = "ArgHSEPtr[1:1]"} +} + +extern "C" void do_thing(); + +extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}NormalFunc(%[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare device_resident(ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + { + // CHECK-NEXT: cir.scope { +#pragma acc declare device_resident(LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + } + // CHECK-NEXT: } + + // Make sure that cleanup gets put in the right scope. + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, name = "ArgHSEPtr[1:1]"} +} + diff --git a/clang/test/CIR/CodeGenOpenACC/declare-present.cpp b/clang/test/CIR/CodeGenOpenACC/declare-present.cpp new file mode 100644 index 0000000000000..c17b9597adf12 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/declare-present.cpp @@ -0,0 +1,199 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +struct HasSideEffects { + HasSideEffects(); + ~HasSideEffects(); +}; + +// TODO: OpenACC: Implement 'global', NS lowering. + +struct Struct { + static const HasSideEffects StaticMemHSE; + static const HasSideEffects StaticMemHSEArr[5]; + static const int StaticMemInt; + + // TODO: OpenACC: Implement static-local lowering. + + void MemFunc1(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc1{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + int LocalInt; + +#pragma acc declare present(ArgHSE, ArgInt, LocalHSE, LocalInt, ArgHSEPtr[1:1], LocalHSEArr[1:1]) + // CHECK: %[[ARG_HSE_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_PRESENT:.*]] = acc.present varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} + // CHECK-NEXT: %[[LOC_HSE_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_PRESENT:.*]] = acc.present varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER]]) dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_PRESENT]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_PRESENT]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + } + void MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr); +}; + +void use() { + Struct s; + s.MemFunc1(HasSideEffects{}, 0, nullptr); +} + +void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc2{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare present(ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_PRESENT:.*]] = acc.present varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + +#pragma acc declare present(LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_PRESENT:.*]] = acc.present varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_PRESENT]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_PRESENT]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, name = "ArgHSEPtr[1:1]"} +} + +extern "C" void do_thing(); + +extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}NormalFunc(%[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare present(ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_PRESENT:.*]] = acc.present varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + { + // CHECK-NEXT: cir.scope { +#pragma acc declare present(LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_PRESENT:.*]] = acc.present varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_PRESENT]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + } + // CHECK-NEXT: } + + // Make sure that cleanup gets put in the right scope. + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_PRESENT]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, name = "ArgHSEPtr[1:1]"} +} + diff --git a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp index c8b85a12f84e7..43d91f180acaf 100644 --- a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp +++ b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp @@ -1,8 +1,5 @@ // RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fopenacc -fclangir -emit-cir %s -o %t.cir -verify -void HelloWorld(int *A) { - extern int *E; - -// expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Clause: create}} +int E, A; +// expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Declare Construct}} #pragma acc declare link(E) create(A) -} diff --git a/clang/test/CodeGen/attr-target-clones.c b/clang/test/CodeGen/attr-target-clones.c index 295b25d6478eb..56db77c2b09a3 100644 --- a/clang/test/CodeGen/attr-target-clones.c +++ b/clang/test/CodeGen/attr-target-clones.c @@ -125,6 +125,35 @@ void __attribute__((target_clones("default, arch=ivybridge"))) unused(void) {} // WINDOWS: musttail call void @unused.arch_ivybridge.0 // WINDOWS: musttail call void @unused.default.1 +int __attribute__((target_clones("sse4.2, default"))) inherited(void); +int inherited(void) { return 0; } +// LINUX: define {{.*}}i32 @inherited.sse4.2.0() +// LINUX: define {{.*}}i32 @inherited.default.1() +// LINUX: define weak_odr ptr @inherited.resolver() #[[ATTR_RESOLVER]] comdat +// LINUX: ret ptr @inherited.sse4.2.0 +// LINUX: ret ptr @inherited.default.1 + +// DARWIN: define {{.*}}i32 @inherited.sse4.2.0() +// DARWIN: define {{.*}}i32 @inherited.default.1() +// DARWIN: define weak_odr ptr @inherited.resolver() #[[ATTR_RESOLVER]] { +// DARWIN: ret ptr @inherited.sse4.2.0 +// DARWIN: ret ptr @inherited.default.1 + +// WINDOWS: define dso_local i32 @inherited.sse4.2.0() +// WINDOWS: define dso_local i32 @inherited.default.1() +// WINDOWS: define weak_odr dso_local i32 @inherited() #[[ATTR_RESOLVER]] comdat +// WINDOWS: musttail call i32 @inherited.sse4.2.0 +// WINDOWS: musttail call i32 @inherited.default.1 + +int test_inherited(void) { + // LINUX: define {{.*}}i32 @test_inherited() #[[DEF:[0-9]+]] + // DARWIN: define {{.*}}i32 @test_inherited() #[[DEF:[0-9]+]] + // WINDOWS: define dso_local i32 @test_inherited() #[[DEF:[0-9]+]] + return inherited(); + // LINUX: call i32 @inherited() + // DARWIN: call i32 @inherited() + // WINDOWS: call i32 @inherited() +} inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2"))) foo_inline(void) { return 0; } diff --git a/clang/test/Sema/attr-target-clones.c b/clang/test/Sema/attr-target-clones.c index 4597ea54d02bf..40688772eeb96 100644 --- a/clang/test/Sema/attr-target-clones.c +++ b/clang/test/Sema/attr-target-clones.c @@ -28,6 +28,17 @@ int __attribute__((target_clones("sse4.2", "arch=atom", "default"))) redecl4(voi int __attribute__((target_clones("sse4.2", "arch=sandybridge", "default"))) redecl4(void) { return 1; } +int __attribute__((target_clones("sse4.2", "default"))) redecl5(void); +int redecl5(void) { return 1; } + +int redecl6(void); +int __attribute__((target_clones("sse4.2", "default"))) redecl6(void) { return 1; } + +int __attribute__((target_clones("sse4.2", "default"))) redecl7(void); +// expected-error@+2 {{multiversioning attributes cannot be combined}} +// expected-note@-2 {{previous declaration is here}} +int __attribute__((target("sse4.2"))) redecl7(void) { return 1; } + int __attribute__((target("sse4.2"))) redef2(void) { return 1; } // expected-error@+2 {{multiversioning attributes cannot be combined}} // expected-note@-2 {{previous declaration is here}} @@ -87,6 +98,8 @@ int useage(void) { int __attribute__((target_clones("sse4.2", "default"))) mv_after_use(void) { return 1; } void bad_overload1(void) __attribute__((target_clones("mmx", "sse4.2", "default"))); +// expected-error@+2 {{conflicting types for 'bad_overload1'}} +// expected-note@-2 {{previous declaration is here}} void bad_overload1(int p) {} void bad_overload2(int p) {} diff --git a/clang/unittests/AST/TypePrinterTest.cpp b/clang/unittests/AST/TypePrinterTest.cpp index 410ec021d6e72..3cadf9b265bd1 100644 --- a/clang/unittests/AST/TypePrinterTest.cpp +++ b/clang/unittests/AST/TypePrinterTest.cpp @@ -341,3 +341,22 @@ TEST(TypePrinter, NestedNameSpecifiers) { Policy.AnonymousTagLocations = false; })); } + +TEST(TypePrinter, NestedNameSpecifiersTypedef) { + constexpr char Code[] = R"cpp( + typedef union { + struct { + struct { + unsigned int baz; + } bar; + }; + } foo; + )cpp"; + + ASSERT_TRUE(PrintedTypeMatches( + Code, {}, fieldDecl(hasName("bar"), hasType(qualType().bind("id"))), + "struct foo::(anonymous struct)::(unnamed)", [](PrintingPolicy &Policy) { + Policy.FullyQualifiedName = true; + Policy.AnonymousTagLocations = false; + })); +} diff --git a/flang/test/Transforms/OpenACC/acc-implicit-data.fir b/flang/test/Transforms/OpenACC/acc-implicit-data.fir index 7f6a57cb4d8c6..2d28c341d0d5e 100644 --- a/flang/test/Transforms/OpenACC/acc-implicit-data.fir +++ b/flang/test/Transforms/OpenACC/acc-implicit-data.fir @@ -133,7 +133,7 @@ func.func @test_fir_derivedtype_in_parallel_defaultpresent() { return } -// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {implicit = true, name = "aggrvar"} +// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {acc.from_default, implicit = true, name = "aggrvar"} // CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref>) {dataClause = #acc, implicit = true, name = "aggrvar"} // ----- @@ -147,7 +147,7 @@ func.func @test_fir_derivedtype_in_kernels_defaultpresent() { return } -// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {implicit = true, name = "aggrvar"} +// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {acc.from_default, implicit = true, name = "aggrvar"} // CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref>) {dataClause = #acc, implicit = true, name = "aggrvar"} // ----- @@ -161,7 +161,7 @@ func.func @test_fir_array_in_parallel_defaultpresent() { return } -// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {implicit = true, name = "arrayvar"} +// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {acc.from_default, implicit = true, name = "arrayvar"} // CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref>) {dataClause = #acc, implicit = true, name = "arrayvar"} // ----- @@ -175,7 +175,7 @@ func.func @test_fir_array_in_kernels_defaultpresent() { return } -// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {implicit = true, name = "arrayvar"} +// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {acc.from_default, implicit = true, name = "arrayvar"} // CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref>) {dataClause = #acc, implicit = true, name = "arrayvar"} // ----- diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst index b8e0e9b5a1814..6ea323ced3003 100644 --- a/libcxx/docs/ReleaseNotes/22.rst +++ b/libcxx/docs/ReleaseNotes/22.rst @@ -83,6 +83,8 @@ Improvements and New Features iterators, resulting in a performance improvement for ``std::deque`` and ``std::join_view>>`` iterators. +- The ``num_get::do_get`` integral overloads have been optimized, resulting in a performance improvement of up to 2.8x. + Deprecations and Removals ------------------------- diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 4b2713191c1c0..f289666ec12ab 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -520,7 +520,6 @@ set(files __locale_dir/locale_base_api.h __locale_dir/locale_base_api/bsd_locale_fallbacks.h __locale_dir/locale_base_api/ibm.h - __locale_dir/locale_base_api/musl.h __locale_dir/locale_base_api/openbsd.h __locale_dir/messages.h __locale_dir/money.h diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h index aaeb8a881df18..f73c9ea4b6ea7 100644 --- a/libcxx/include/__algorithm/simd_utils.h +++ b/libcxx/include/__algorithm/simd_utils.h @@ -114,6 +114,27 @@ template }(make_index_sequence<__simd_vector_size_v<_VecT>>{}); } +// Load the first _Np elements, zero the rest +_LIBCPP_DIAGNOSTIC_PUSH +_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wpsabi") +template +[[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __partial_load(_Iter __iter) noexcept { + return [=]( + index_sequence<_LoadIndices...>, index_sequence<_ZeroIndices...>) _LIBCPP_ALWAYS_INLINE noexcept { + return _VecT{__iter[_LoadIndices]..., ((void)_ZeroIndices, 0)...}; + }(make_index_sequence<_Np>{}, make_index_sequence<__simd_vector_size_v<_VecT> - _Np>{}); +} + +// Create a vector where every elements is __val +template +[[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT +__broadcast(__simd_vector_underlying_type_t<_VecT> __val) { + return [&](index_sequence<_Indices...>) { + return _VecT{((void)_Indices, __val)...}; + }(make_index_sequence<__simd_vector_size_v<_VecT>>()); +} +_LIBCPP_DIAGNOSTIC_POP + template [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __any_of(__simd_vector<_Tp, _Np> __vec) noexcept { return __builtin_reduce_or(__builtin_convertvector(__vec, __simd_vector)); @@ -124,6 +145,11 @@ template return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector)); } +template +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __none_of(__simd_vector<_Tp, _Np> __vec) noexcept { + return !__builtin_reduce_or(__builtin_convertvector(__vec, __simd_vector)); +} + template [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept { using __mask_vec = __simd_vector; diff --git a/libcxx/include/__config b/libcxx/include/__config index d79ace0cbb896..1b27f28f9ddef 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -546,7 +546,10 @@ typedef __char32_t char32_t; # define _LIBCPP_DEPRECATED_(m) # endif -# if defined(__DEPRECATED) && __DEPRECATED && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) +// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be +// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that +// allows suppression in system headers. +# if defined(__DEPRECATED) && __DEPRECATED && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) && 0 # define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 1 # else # define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 0 diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h index fef90bb77991f..d26d529d4e0c2 100644 --- a/libcxx/include/__locale_dir/locale_base_api.h +++ b/libcxx/include/__locale_dir/locale_base_api.h @@ -57,8 +57,6 @@ // float __strtof(const char*, char**, __locale_t); // double __strtod(const char*, char**, __locale_t); // long double __strtold(const char*, char**, __locale_t); -// long long __strtoll(const char*, char**, __locale_t); -// unsigned long long __strtoull(const char*, char**, __locale_t); // } // // Character manipulation functions @@ -104,7 +102,6 @@ // // int __snprintf(char*, size_t, __locale_t, const char*, ...); // required by the headers // int __asprintf(char**, __locale_t, const char*, ...); // required by the headers -// int __sscanf(const char*, __locale_t, const char*, ...); // required by the headers // } #if _LIBCPP_HAS_LOCALIZATION @@ -131,8 +128,6 @@ # include <__locale_dir/locale_base_api/ibm.h> # elif defined(__OpenBSD__) # include <__locale_dir/locale_base_api/openbsd.h> -# elif defined(__wasi__) || _LIBCPP_HAS_MUSL_LIBC -# include <__locale_dir/locale_base_api/musl.h> # endif # include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h> @@ -192,15 +187,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __ return strtold_l(__nptr, __endptr, __loc); } -inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { - return strtoll_l(__nptr, __endptr, __base, __loc); -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long long -__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { - return strtoull_l(__nptr, __endptr, __base, __loc); -} - // // Character manipulation functions // @@ -299,11 +285,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __ char** __s, __locale_t __loc, const char* __format, _Args&&... __args) { return std::__libcpp_asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...); } -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( - const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) { - return std::__libcpp_sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...); -} _LIBCPP_DIAGNOSTIC_POP # undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT diff --git a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h index b62a1b737e97f..8cdbe0cd15051 100644 --- a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h +++ b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h @@ -125,16 +125,6 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __libcpp_asprintf_l( return __res; } -inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l( - const char* __s, locale_t __l, const char* __format, ...) { - va_list __va; - va_start(__va, __format); - __locale_guard __current(__l); - int __res = vsscanf(__s, __format, __va); - va_end(__va); - return __res; -} - _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H diff --git a/libcxx/include/__locale_dir/locale_base_api/ibm.h b/libcxx/include/__locale_dir/locale_base_api/ibm.h index 1d1d15df9f799..47a83eac7df36 100644 --- a/libcxx/include/__locale_dir/locale_base_api/ibm.h +++ b/libcxx/include/__locale_dir/locale_base_api/ibm.h @@ -53,11 +53,6 @@ struct __setAndRestore { // The following are not POSIX routines. These are quick-and-dirty hacks // to make things pretend to work -inline _LIBCPP_HIDE_FROM_ABI long long strtoll_l(const char* __nptr, char** __endptr, int __base, locale_t locale) { - __setAndRestore __newloc(locale); - return ::strtoll(__nptr, __endptr, __base); -} - inline _LIBCPP_HIDE_FROM_ABI double strtod_l(const char* __nptr, char** __endptr, locale_t locale) { __setAndRestore __newloc(locale); return ::strtod(__nptr, __endptr); @@ -73,12 +68,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double strtold_l(const char* __nptr, char** __ return ::strtold(__nptr, __endptr); } -inline _LIBCPP_HIDE_FROM_ABI unsigned long long -strtoull_l(const char* __nptr, char** __endptr, int __base, locale_t locale) { - __setAndRestore __newloc(locale); - return ::strtoull(__nptr, __endptr, __base); -} - inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 2, 0) int vasprintf(char** strp, const char* fmt, va_list ap) { const size_t buff_size = 256; diff --git a/libcxx/include/__locale_dir/locale_base_api/musl.h b/libcxx/include/__locale_dir/locale_base_api/musl.h deleted file mode 100644 index 1653214cdba1e..0000000000000 --- a/libcxx/include/__locale_dir/locale_base_api/musl.h +++ /dev/null @@ -1,31 +0,0 @@ -// -*- C++ -*- -//===-----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// This adds support for the extended locale functions that are currently -// missing from the Musl C library. -// -// This only works when the specified locale is "C" or "POSIX", but that's -// about as good as we can do without implementing full xlocale support -// in Musl. -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H -#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H - -#include -#include - -inline _LIBCPP_HIDE_FROM_ABI long long strtoll_l(const char* __nptr, char** __endptr, int __base, locale_t) { - return ::strtoll(__nptr, __endptr, __base); -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, char** __endptr, int __base, locale_t) { - return ::strtoull(__nptr, __endptr, __base); -} - -#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H diff --git a/libcxx/include/__locale_dir/num.h b/libcxx/include/__locale_dir/num.h index 6eef9b3095d6e..98b8eb0c600f5 100644 --- a/libcxx/include/__locale_dir/num.h +++ b/libcxx/include/__locale_dir/num.h @@ -12,6 +12,7 @@ #include <__algorithm/copy.h> #include <__algorithm/find.h> #include <__algorithm/reverse.h> +#include <__algorithm/simd_utils.h> #include <__charconv/to_chars_integral.h> #include <__charconv/traits.h> #include <__config> @@ -48,9 +49,9 @@ struct _LIBCPP_EXPORTED_FROM_ABI __num_get_base { static int __get_base(ios_base&); static const char __src[33]; // "0123456789abcdefABCDEFxX+-pPiInN" // count of leading characters in __src used for parsing integers ("012..X+-") - static const size_t __int_chr_cnt = 26; + static inline const size_t __int_chr_cnt = 26; // count of leading characters in __src used for parsing floating-point values ("012..-pP") - static const size_t __fp_chr_cnt = 28; + static inline const size_t __fp_chr_cnt = 28; }; template @@ -73,7 +74,8 @@ struct __num_get : protected __num_get_base { [[__deprecated__("This exists only for ABI compatibility")]] static string __stage2_int_prep(ios_base& __iob, _CharT* __atoms, _CharT& __thousands_sep); - static int __stage2_int_loop( + + [[__deprecated__("This exists only for ABI compatibility")]] static int __stage2_int_loop( _CharT __ct, int __base, char* __a, @@ -85,11 +87,24 @@ struct __num_get : protected __num_get_base { unsigned*& __g_end, _CharT* __atoms); - _LIBCPP_HIDE_FROM_ABI static string __stage2_int_prep(ios_base& __iob, _CharT& __thousands_sep) { - locale __loc = __iob.getloc(); - const numpunct<_CharT>& __np = use_facet >(__loc); - __thousands_sep = __np.thousands_sep(); - return __np.grouping(); + _LIBCPP_HIDE_FROM_ABI static ptrdiff_t __atoms_offset(const _CharT* __atoms, _CharT __val) { + // TODO: Remove the manual vectorization once https://llvm.org/PR168551 is resolved +# if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS + if constexpr (is_same<_CharT, char>::value) { + // TODO(LLVM 24): This can be removed, since -Wpsabi doesn't warn on [[gnu::always_inline]] functions anymore. + _LIBCPP_DIAGNOSTIC_PUSH + _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wpsabi") + using __vec = __simd_vector; + __vec __chars = std::__broadcast<__vec>(__val); + __vec __cmp = std::__partial_load<__vec, __int_chr_cnt>(__atoms); + auto __res = __chars == __cmp; + if (std::__none_of(__res)) + return __int_chr_cnt; + return std::min(__int_chr_cnt, std::__find_first_set(__res)); + _LIBCPP_DIAGNOSTIC_POP + } +# endif + return std::find(__atoms, __atoms + __int_chr_cnt, __val) - __atoms; } _LIBCPP_HIDE_FROM_ABI const _CharT* __do_widen(ios_base& __iob, _CharT* __atoms) const { @@ -122,54 +137,6 @@ string __num_get<_CharT>::__stage2_float_prep( return __np.grouping(); } -template -int __num_get<_CharT>::__stage2_int_loop( - _CharT __ct, - int __base, - char* __a, - char*& __a_end, - unsigned& __dc, - _CharT __thousands_sep, - const string& __grouping, - unsigned* __g, - unsigned*& __g_end, - _CharT* __atoms) { - if (__a_end == __a && (__ct == __atoms[24] || __ct == __atoms[25])) { - *__a_end++ = __ct == __atoms[24] ? '+' : '-'; - __dc = 0; - return 0; - } - if (__grouping.size() != 0 && __ct == __thousands_sep) { - if (__g_end - __g < __num_get_buf_sz) { - *__g_end++ = __dc; - __dc = 0; - } - return 0; - } - ptrdiff_t __f = std::find(__atoms, __atoms + __int_chr_cnt, __ct) - __atoms; - if (__f >= 24) - return -1; - switch (__base) { - case 8: - case 10: - if (__f >= __base) - return -1; - break; - case 16: - if (__f < 22) - break; - if (__a_end != __a && __a_end - __a <= 2 && __a_end[-1] == '0') { - __dc = 0; - *__a_end++ = __src[__f]; - return 0; - } - return -1; - } - *__a_end++ = __src[__f]; - ++__dc; - return 0; -} - template int __num_get<_CharT>::__stage2_float_loop( _CharT __ct, @@ -274,65 +241,6 @@ _LIBCPP_HIDE_FROM_ABI _Tp __num_get_float(const char* __a, const char* __a_end, return 0; } -template -_LIBCPP_HIDE_FROM_ABI _Tp -__num_get_signed_integral(const char* __a, const char* __a_end, ios_base::iostate& __err, int __base) { - if (__a != __a_end) { - __libcpp_remove_reference_t __save_errno = errno; - errno = 0; - char* __p2; - long long __ll = __locale::__strtoll(__a, &__p2, __base, _LIBCPP_GET_C_LOCALE); - __libcpp_remove_reference_t __current_errno = errno; - if (__current_errno == 0) - errno = __save_errno; - if (__p2 != __a_end) { - __err = ios_base::failbit; - return 0; - } else if (__current_errno == ERANGE || __ll < numeric_limits<_Tp>::min() || numeric_limits<_Tp>::max() < __ll) { - __err = ios_base::failbit; - if (__ll > 0) - return numeric_limits<_Tp>::max(); - else - return numeric_limits<_Tp>::min(); - } - return static_cast<_Tp>(__ll); - } - __err = ios_base::failbit; - return 0; -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp -__num_get_unsigned_integral(const char* __a, const char* __a_end, ios_base::iostate& __err, int __base) { - if (__a != __a_end) { - const bool __negate = *__a == '-'; - if (__negate && ++__a == __a_end) { - __err = ios_base::failbit; - return 0; - } - __libcpp_remove_reference_t __save_errno = errno; - errno = 0; - char* __p2; - unsigned long long __ll = __locale::__strtoull(__a, &__p2, __base, _LIBCPP_GET_C_LOCALE); - __libcpp_remove_reference_t __current_errno = errno; - if (__current_errno == 0) - errno = __save_errno; - if (__p2 != __a_end) { - __err = ios_base::failbit; - return 0; - } else if (__current_errno == ERANGE || numeric_limits<_Tp>::max() < __ll) { - __err = ios_base::failbit; - return numeric_limits<_Tp>::max(); - } - _Tp __res = static_cast<_Tp>(__ll); - if (__negate) - __res = -__res; - return __res; - } - __err = ios_base::failbit; - return 0; -} - template > class num_get : public locale::facet, private __num_get<_CharT> { public: @@ -470,137 +378,194 @@ class num_get : public locale::facet, private __num_get<_CharT> { return __b; } - template - _LIBCPP_HIDE_FROM_ABI iter_type - __do_get_signed(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Signed& __v) const { + template + iter_type __do_get_integral( + iter_type __first, iter_type __last, ios_base& __iob, ios_base::iostate& __err, _MaybeSigned& __v) const { + using _Unsigned = __make_unsigned_t<_MaybeSigned>; + // Stage 1 int __base = this->__get_base(__iob); - // Stage 2 - char_type __thousands_sep; - const int __atoms_size = __num_get_base::__int_chr_cnt; - char_type __atoms1[__atoms_size]; - const char_type* __atoms = this->__do_widen(__iob, __atoms1); - string __grouping = this->__stage2_int_prep(__iob, __thousands_sep); - string __buf; - __buf.resize(__buf.capacity()); - char* __a = &__buf[0]; - char* __a_end = __a; + + // Stages 2 & 3 + // These are combined into a single step where we parse the characters and calculate the value in one go instead of + // storing the relevant characters first (in an allocated buffer) and parse the characters after we extracted them. + // This makes the whole process significantly faster, since we avoid potential allocations and copies. + + const auto& __numpunct = use_facet >(__iob.getloc()); + char_type __thousands_sep = __numpunct.thousands_sep(); + string __grouping = __numpunct.grouping(); + + char_type __atoms_buffer[__num_get_base::__int_chr_cnt]; + const char_type* __atoms = this->__do_widen(__iob, __atoms_buffer); unsigned __g[__num_get_base::__num_get_buf_sz]; unsigned* __g_end = __g; unsigned __dc = 0; - for (; __b != __e; ++__b) { - if (__a_end == __a + __buf.size()) { - size_t __tmp = __buf.size(); - __buf.resize(2 * __buf.size()); - __buf.resize(__buf.capacity()); - __a = &__buf[0]; - __a_end = __a + __tmp; + + if (__first == __last) { + __err |= ios_base::eofbit | ios_base::failbit; + __v = 0; + return __first; + } + + while (!__grouping.empty() && *__first == __thousands_sep) { + ++__first; + if (__g_end - __g < this->__num_get_buf_sz) + *__g_end++ = 0; + } + + bool __negate = false; + // __c == '+' || __c == '-' + if (auto __c = *__first; __c == __atoms[24] || __c == __atoms[25]) { + __negate = __c == __atoms[25]; + ++__first; + } + + if (__first == __last) { + __err |= ios_base::eofbit | ios_base::failbit; + __v = 0; + return __first; + } + + bool __parsed_num = false; + + // If we don't have a pre-set base, figure it out and swallow any prefix + if (__base == 0) { + auto __c = *__first; + // __c == '0' + if (__c == __atoms[0]) { + ++__first; + if (__first == __last) { + __err |= ios_base::eofbit; + return __first; + } + // __c2 == 'x' || __c2 == 'X' + if (auto __c2 = *__first; __c2 == __atoms[22] || __c2 == __atoms[23]) { + __base = 16; + ++__first; + } else { + __base = 8; + } + } else { + __base = 10; + } + + // If the base has been specified explicitly, try to swallow the appropriate prefix. We only need to do something + // special for hex, since decimal has no prefix and octal's prefix is '0', which doesn't change the value that + // we'll parse if we don't swallow it. + } else if (__base == 16) { + // Try to swallow '0x' + + // *__first == '0' + if (*__first == __atoms[0]) { + ++__first; + if (__first == __last) { + __err |= ios_base::eofbit; + __v = 0; + return __first; + } + // __c == 'x' || __c == 'X' + if (auto __c = *__first; __c == __atoms[22] || __c == __atoms[23]) + ++__first; + else + __parsed_num = true; // We only swallowed '0', so we've started to parse a number } - if (this->__stage2_int_loop( - *__b, - __base, - __a, - __a_end, - __dc, - __thousands_sep, - __grouping, - __g, - __g_end, - const_cast(__atoms))) - break; } - if (__grouping.size() != 0 && __g_end - __g < __num_get_base::__num_get_buf_sz) - *__g_end++ = __dc; - // Stage 3 - __v = std::__num_get_signed_integral<_Signed>(__a, __a_end, __err, __base); - // Digit grouping checked - __check_grouping(__grouping, __g, __g_end, __err); - // EOF checked - if (__b == __e) - __err |= ios_base::eofbit; - return __b; - } - template - _LIBCPP_HIDE_FROM_ABI iter_type - __do_get_unsigned(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Unsigned& __v) const { - // Stage 1 - int __base = this->__get_base(__iob); - // Stage 2 - char_type __thousands_sep; - const int __atoms_size = __num_get_base::__int_chr_cnt; - char_type __atoms1[__atoms_size]; - const char_type* __atoms = this->__do_widen(__iob, __atoms1); - string __grouping = this->__stage2_int_prep(__iob, __thousands_sep); - string __buf; - __buf.resize(__buf.capacity()); - char* __a = &__buf[0]; - char* __a_end = __a; - unsigned __g[__num_get_base::__num_get_buf_sz]; - unsigned* __g_end = __g; - unsigned __dc = 0; - for (; __b != __e; ++__b) { - if (__a_end == __a + __buf.size()) { - size_t __tmp = __buf.size(); - __buf.resize(2 * __buf.size()); - __buf.resize(__buf.capacity()); - __a = &__buf[0]; - __a_end = __a + __tmp; + // Calculate the actual number + _Unsigned __val = 0; + bool __overflowed = false; + for (; __first != __last; ++__first) { + auto __c = *__first; + if (!__grouping.empty() && __c == __thousands_sep) { + if (__g_end - __g < this->__num_get_buf_sz) { + *__g_end++ = __dc; + __dc = 0; + } + continue; } - if (this->__stage2_int_loop( - *__b, - __base, - __a, - __a_end, - __dc, - __thousands_sep, - __grouping, - __g, - __g_end, - const_cast(__atoms))) + auto __offset = this->__atoms_offset(__atoms, __c); + if (__offset >= 22) // Not a valid integer character + break; + + if (__base == 16 && __offset >= 16) + __offset -= 6; + if (__offset >= __base) break; + // __val = (__val * __base) + __offset + __overflowed |= __builtin_mul_overflow(__val, __base, std::addressof(__val)) || + __builtin_add_overflow(__val, __offset, std::addressof(__val)); + __parsed_num = true; + ++__dc; } + + if (!__parsed_num) { + __err |= ios_base::failbit; + __v = 0; + } else if (__overflowed) { + __err |= ios_base::failbit; + __v = is_signed<_MaybeSigned>::value && __negate + ? numeric_limits<_MaybeSigned>::min() + : numeric_limits<_MaybeSigned>::max(); + } else if (!__negate) { + if (__val > static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max())) { + __err |= ios_base::failbit; + __v = numeric_limits<_MaybeSigned>::max(); + } else { + __v = __val; + } + } else if (is_signed<_MaybeSigned>::value) { + if (__val > static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max()) + 1) { + __err |= ios_base::failbit; + __v = numeric_limits<_MaybeSigned>::min(); + } else if (__val == static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max()) + 1) { + __v = numeric_limits<_MaybeSigned>::min(); + } else { + __v = -__val; + } + } else { + __v = -__val; + } + if (__grouping.size() != 0 && __g_end - __g < __num_get_base::__num_get_buf_sz) *__g_end++ = __dc; - // Stage 3 - __v = std::__num_get_unsigned_integral<_Unsigned>(__a, __a_end, __err, __base); + // Digit grouping checked __check_grouping(__grouping, __g, __g_end, __err); // EOF checked - if (__b == __e) + if (__first == __last) __err |= ios_base::eofbit; - return __b; + return __first; } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, bool& __v) const; virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long& __v) const { - return this->__do_get_signed(__b, __e, __iob, __err, __v); + return this->__do_get_integral(__b, __e, __iob, __err, __v); } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long long& __v) const { - return this->__do_get_signed(__b, __e, __iob, __err, __v); + return this->__do_get_integral(__b, __e, __iob, __err, __v); } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned short& __v) const { - return this->__do_get_unsigned(__b, __e, __iob, __err, __v); + return this->__do_get_integral(__b, __e, __iob, __err, __v); } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned int& __v) const { - return this->__do_get_unsigned(__b, __e, __iob, __err, __v); + return this->__do_get_integral(__b, __e, __iob, __err, __v); } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long& __v) const { - return this->__do_get_unsigned(__b, __e, __iob, __err, __v); + return this->__do_get_integral(__b, __e, __iob, __err, __v); } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long long& __v) const { - return this->__do_get_unsigned(__b, __e, __iob, __err, __v); + return this->__do_get_integral(__b, __e, __iob, __err, __v); } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, float& __v) const { @@ -654,40 +619,13 @@ _InputIterator num_get<_CharT, _InputIterator>::do_get( template _InputIterator num_get<_CharT, _InputIterator>::do_get( iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, void*& __v) const { - // Stage 1 - int __base = 16; - // Stage 2 - char_type __atoms[__num_get_base::__int_chr_cnt]; - char_type __thousands_sep = char_type(); - string __grouping; - std::use_facet >(__iob.getloc()) - .widen(__num_get_base::__src, __num_get_base::__src + __num_get_base::__int_chr_cnt, __atoms); - string __buf; - __buf.resize(__buf.capacity()); - char* __a = &__buf[0]; - char* __a_end = __a; - unsigned __g[__num_get_base::__num_get_buf_sz]; - unsigned* __g_end = __g; - unsigned __dc = 0; - for (; __b != __e; ++__b) { - if (__a_end == __a + __buf.size()) { - size_t __tmp = __buf.size(); - __buf.resize(2 * __buf.size()); - __buf.resize(__buf.capacity()); - __a = &__buf[0]; - __a_end = __a + __tmp; - } - if (this->__stage2_int_loop(*__b, __base, __a, __a_end, __dc, __thousands_sep, __grouping, __g, __g_end, __atoms)) - break; - } - // Stage 3 - __buf.resize(__a_end - __a); - if (__locale::__sscanf(__buf.c_str(), _LIBCPP_GET_C_LOCALE, "%p", &__v) != 1) - __err = ios_base::failbit; - // EOF checked - if (__b == __e) - __err |= ios_base::eofbit; - return __b; + auto __flags = __iob.flags(); + __iob.flags((__flags & ~ios_base::basefield & ~ios_base::uppercase) | ios_base::hex); + uintptr_t __ptr; + auto __res = __do_get_integral(__b, __e, __iob, __err, __ptr); + __iob.flags(__flags); + __v = reinterpret_cast(__ptr); + return __res; } extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_get; diff --git a/libcxx/include/__locale_dir/support/bsd_like.h b/libcxx/include/__locale_dir/support/bsd_like.h index 27735529d5524..6f533b4e1eab1 100644 --- a/libcxx/include/__locale_dir/support/bsd_like.h +++ b/libcxx/include/__locale_dir/support/bsd_like.h @@ -79,15 +79,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __ return ::strtold_l(__nptr, __endptr, __loc); } -inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { - return ::strtoll_l(__nptr, __endptr, __base, __loc); -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long long -__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { - return ::strtoull_l(__nptr, __endptr, __base, __loc); -} - // // Character manipulation functions // @@ -211,12 +202,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __ char** __s, __locale_t __loc, const char* __format, _Args&&... __args) { return ::asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...); // non-standard } - -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( - const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) { - return ::sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...); -} _LIBCPP_DIAGNOSTIC_POP #undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT diff --git a/libcxx/include/__locale_dir/support/fuchsia.h b/libcxx/include/__locale_dir/support/fuchsia.h index 4b9e63facb19e..528bfeb0cb6e1 100644 --- a/libcxx/include/__locale_dir/support/fuchsia.h +++ b/libcxx/include/__locale_dir/support/fuchsia.h @@ -141,13 +141,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __ __locale_guard __current(__loc); return ::asprintf(__s, __format, std::forward<_Args>(__args)...); // non-standard } -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( - const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) { - __locale_guard __current(__loc); - return std::sscanf(__s, __format, std::forward<_Args>(__args)...); -} - _LIBCPP_DIAGNOSTIC_POP #undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT diff --git a/libcxx/include/__locale_dir/support/linux.h b/libcxx/include/__locale_dir/support/linux.h index 94a2ecb9a940d..1a589be49bf1d 100644 --- a/libcxx/include/__locale_dir/support/linux.h +++ b/libcxx/include/__locale_dir/support/linux.h @@ -94,25 +94,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __ return ::strtold_l(__nptr, __endptr, __loc); } -inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { -#if !_LIBCPP_HAS_MUSL_LIBC - return ::strtoll_l(__nptr, __endptr, __base, __loc); -#else - (void)__loc; - return ::strtoll(__nptr, __endptr, __base); -#endif -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long long -__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { -#if !_LIBCPP_HAS_MUSL_LIBC - return ::strtoull_l(__nptr, __endptr, __base, __loc); -#else - (void)__loc; - return ::strtoull(__nptr, __endptr, __base); -#endif -} - // // Character manipulation functions // @@ -257,20 +238,6 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf( va_end(__va); return __res; } - -#ifndef _LIBCPP_COMPILER_GCC // GCC complains that this can't be always_inline due to C-style varargs -_LIBCPP_HIDE_FROM_ABI -#endif -inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( - const char* __s, __locale_t __loc, const char* __format, ...) { - va_list __va; - va_start(__va, __format); - __locale_guard __current(__loc); - int __res = std::vsscanf(__s, __format, __va); - va_end(__va); - return __res; -} - } // namespace __locale _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__locale_dir/support/no_locale/strtonum.h b/libcxx/include/__locale_dir/support/no_locale/strtonum.h index 0e7a32993e736..59544e10e4a4c 100644 --- a/libcxx/include/__locale_dir/support/no_locale/strtonum.h +++ b/libcxx/include/__locale_dir/support/no_locale/strtonum.h @@ -34,15 +34,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __ return std::strtold(__nptr, __endptr); } -inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t) { - return std::strtoll(__nptr, __endptr, __base); -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long long -__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t) { - return std::strtoull(__nptr, __endptr, __base); -} - } // namespace __locale _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__locale_dir/support/windows.h b/libcxx/include/__locale_dir/support/windows.h index edd8a66c23e80..644ef68adf545 100644 --- a/libcxx/include/__locale_dir/support/windows.h +++ b/libcxx/include/__locale_dir/support/windows.h @@ -186,14 +186,6 @@ inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr return ::_strtod_l(__nptr, __endptr, __loc); } -inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { - return ::_strtoi64_l(__nptr, __endptr, __base, __loc); -} -inline _LIBCPP_HIDE_FROM_ABI unsigned long long -__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { - return ::_strtoui64_l(__nptr, __endptr, __base, __loc); -} - // // Character manipulation functions // @@ -276,23 +268,6 @@ _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snpri _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf(char** __ret, __locale_t __loc, const char* __format, ...); -_LIBCPP_DIAGNOSTIC_PUSH -_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat") -_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") // GCC doesn't support [[gnu::format]] on variadic templates -#ifdef _LIBCPP_COMPILER_CLANG_BASED -# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) _LIBCPP_ATTRIBUTE_FORMAT(__VA_ARGS__) -#else -# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) /* nothing */ -#endif - -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( - const char* __dest, __locale_t __loc, const char* __format, _Args&&... __args) { - return ::_sscanf_l(__dest, __format, __loc, std::forward<_Args>(__args)...); -} -_LIBCPP_DIAGNOSTIC_POP -#undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT - #if defined(_LIBCPP_BUILDING_LIBRARY) struct __locale_guard { _LIBCPP_HIDE_FROM_ABI __locale_guard(__locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) { diff --git a/libcxx/include/__support/xlocale/__strtonum_fallback.h b/libcxx/include/__support/xlocale/__strtonum_fallback.h index 5275aead35af9..90bd59d36c248 100644 --- a/libcxx/include/__support/xlocale/__strtonum_fallback.h +++ b/libcxx/include/__support/xlocale/__strtonum_fallback.h @@ -34,12 +34,4 @@ inline _LIBCPP_HIDE_FROM_ABI long double strtold_l(const char* __nptr, char** __ return ::strtold(__nptr, __endptr); } -inline _LIBCPP_HIDE_FROM_ABI long long strtoll_l(const char* __nptr, char** __endptr, int __base, locale_t) { - return ::strtoll(__nptr, __endptr, __base); -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, char** __endptr, int __base, locale_t) { - return ::strtoull(__nptr, __endptr, __base); -} - #endif // _LIBCPP___SUPPORT_XLOCALE_STRTONUM_FALLBACK_H diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index 57d66cd1ccaef..492708792cbbf 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -1591,7 +1591,6 @@ module std [system] { module locale_base_api { textual header "__locale_dir/locale_base_api/bsd_locale_fallbacks.h" textual header "__locale_dir/locale_base_api/ibm.h" - textual header "__locale_dir/locale_base_api/musl.h" textual header "__locale_dir/locale_base_api/openbsd.h" } export * diff --git a/libcxx/include/string_view b/libcxx/include/string_view index 5ecaa3de7deba..5dd04a9ba8479 100644 --- a/libcxx/include/string_view +++ b/libcxx/include/string_view @@ -362,11 +362,11 @@ public: # endif // [string.view.iterators], iterators - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return cbegin(); } + [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return cbegin(); } - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return cend(); } + [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return cend(); } - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { # ifdef _LIBCPP_ABI_BOUNDED_ITERATORS return std::__make_bounded_iter(data(), data(), data() + size()); # else @@ -374,7 +374,7 @@ public: # endif } - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { # ifdef _LIBCPP_ABI_BOUNDED_ITERATORS return std::__make_bounded_iter(data() + size(), data(), data() + size()); # else @@ -382,51 +382,54 @@ public: # endif } - _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator + rbegin() const _NOEXCEPT { return const_reverse_iterator(cend()); } - _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT { return const_reverse_iterator(cbegin()); } - _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator + crbegin() const _NOEXCEPT { return const_reverse_iterator(cend()); } - _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return const_reverse_iterator(cbegin()); } // [string.view.capacity], capacity - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; } + [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; } - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type length() const _NOEXCEPT { return __size_; } + [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type length() const _NOEXCEPT { return __size_; } - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return numeric_limits::max() / sizeof(value_type); } [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return __size_ == 0; } // [string.view.access], element access - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference operator[](size_type __pos) const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference + operator[](size_type __pos) const _NOEXCEPT { return _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__pos < size(), "string_view[] index out of bounds"), __data_[__pos]; } - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference at(size_type __pos) const { + [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference at(size_type __pos) const { return __pos >= size() ? (__throw_out_of_range("string_view::at"), __data_[0]) : __data_[__pos]; } - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference front() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference front() const _NOEXCEPT { return _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "string_view::front(): string is empty"), __data_[0]; } - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT { return _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "string_view::back(): string is empty"), __data_[__size_ - 1]; } - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_pointer data() const _NOEXCEPT { return __data_; } + [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_pointer data() const _NOEXCEPT { return __data_; } // [string.view.modifiers], modifiers: _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI void remove_prefix(size_type __n) _NOEXCEPT { @@ -459,7 +462,8 @@ public: return __rlen; } - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI basic_string_view substr(size_type __pos = 0, size_type __n = npos) const { + [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI basic_string_view + substr(size_type __pos = 0, size_type __n = npos) const { // Use the `__assume_valid` form of the constructor to avoid an unnecessary check. Any substring of a view is a // valid view. In particular, `size()` is known to be smaller than `numeric_limits::max()`, so the // new size is also smaller. See also https://llvm.org/PR91634. @@ -474,7 +478,7 @@ public: } # endif - _LIBCPP_CONSTEXPR_SINCE_CXX14 int compare(basic_string_view __sv) const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 int compare(basic_string_view __sv) const _NOEXCEPT { size_type __rlen = std::min(size(), __sv.size()); int __retval = _Traits::compare(data(), __sv.data(), __rlen); if (__retval == 0) // first __rlen chars matched @@ -482,50 +486,51 @@ public: return __retval; } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int compare(size_type __pos1, size_type __n1, basic_string_view __sv) const { return substr(__pos1, __n1).compare(__sv); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int compare(size_type __pos1, size_type __n1, basic_string_view __sv, size_type __pos2, size_type __n2) const { return substr(__pos1, __n1).compare(__sv.substr(__pos2, __n2)); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int compare(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s) const _NOEXCEPT { return compare(basic_string_view(__s)); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int compare(size_type __pos1, size_type __n1, const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s) const { return substr(__pos1, __n1).compare(basic_string_view(__s)); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int compare(size_type __pos1, size_type __n1, const _CharT* __s, size_type __n2) const _LIBCPP_DIAGNOSE_NULLPTR_IF(__n2 != 0 && __s == nullptr, " if n2 is not zero") { return substr(__pos1, __n1).compare(basic_string_view(__s, __n2)); } // find - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find(basic_string_view __s, size_type __pos = 0) const _NOEXCEPT { return std::__str_find(data(), size(), __s.data(), __pos, __s.size()); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find(_CharT __c, size_type __pos = 0) const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + find(_CharT __c, size_type __pos = 0) const _NOEXCEPT { return std::__str_find(data(), size(), __c, __pos); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") { _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find(): received nullptr"); return std::__str_find(data(), size(), __s, __pos, __n); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = 0) const _NOEXCEPT { _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find(): received nullptr"); return std::__str_find( @@ -533,24 +538,24 @@ public: } // rfind - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type rfind(basic_string_view __s, size_type __pos = npos) const _NOEXCEPT { return std::__str_rfind(data(), size(), __s.data(), __pos, __s.size()); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type rfind(_CharT __c, size_type __pos = npos) const _NOEXCEPT { return std::__str_rfind(data(), size(), __c, __pos); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type rfind(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") { _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::rfind(): received nullptr"); return std::__str_rfind(data(), size(), __s, __pos, __n); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type rfind(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = npos) const _NOEXCEPT { _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::rfind(): received nullptr"); return std::__str_rfind( @@ -558,25 +563,25 @@ public: } // find_first_of - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_first_of(basic_string_view __s, size_type __pos = 0) const _NOEXCEPT { return std::__str_find_first_of( data(), size(), __s.data(), __pos, __s.size()); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_first_of(_CharT __c, size_type __pos = 0) const _NOEXCEPT { return find(__c, __pos); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_first_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") { _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find_first_of(): received nullptr"); return std::__str_find_first_of(data(), size(), __s, __pos, __n); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_first_of(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = 0) const _NOEXCEPT { _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find_first_of(): received nullptr"); return std::__str_find_first_of( @@ -584,25 +589,25 @@ public: } // find_last_of - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_last_of(basic_string_view __s, size_type __pos = npos) const _NOEXCEPT { return std::__str_find_last_of( data(), size(), __s.data(), __pos, __s.size()); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_last_of(_CharT __c, size_type __pos = npos) const _NOEXCEPT { return rfind(__c, __pos); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_last_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") { _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find_last_of(): received nullptr"); return std::__str_find_last_of(data(), size(), __s, __pos, __n); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_last_of(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = npos) const _NOEXCEPT { _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find_last_of(): received nullptr"); return std::__str_find_last_of( @@ -610,25 +615,25 @@ public: } // find_first_not_of - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_first_not_of(basic_string_view __s, size_type __pos = 0) const _NOEXCEPT { return std::__str_find_first_not_of( data(), size(), __s.data(), __pos, __s.size()); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_first_not_of(_CharT __c, size_type __pos = 0) const _NOEXCEPT { return std::__str_find_first_not_of(data(), size(), __c, __pos); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_first_not_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") { _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find_first_not_of(): received nullptr"); return std::__str_find_first_not_of(data(), size(), __s, __pos, __n); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_first_not_of(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = 0) const _NOEXCEPT { _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find_first_not_of(): received nullptr"); return std::__str_find_first_not_of( @@ -636,25 +641,25 @@ public: } // find_last_not_of - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_last_not_of(basic_string_view __s, size_type __pos = npos) const _NOEXCEPT { return std::__str_find_last_not_of( data(), size(), __s.data(), __pos, __s.size()); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_last_not_of(_CharT __c, size_type __pos = npos) const _NOEXCEPT { return std::__str_find_last_not_of(data(), size(), __c, __pos); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_last_not_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") { _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find_last_not_of(): received nullptr"); return std::__str_find_last_not_of(data(), size(), __s, __pos, __n); } - _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find_last_not_of(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = npos) const _NOEXCEPT { _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find_last_not_of(): received nullptr"); return std::__str_find_last_not_of( @@ -662,37 +667,43 @@ public: } # if _LIBCPP_STD_VER >= 20 - constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(basic_string_view __s) const noexcept { + [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(basic_string_view __s) const noexcept { return size() >= __s.size() && compare(0, __s.size(), __s) == 0; } - constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(value_type __c) const noexcept { + [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(value_type __c) const noexcept { return !empty() && _Traits::eq(front(), __c); } - constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept { + [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool + starts_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept { return starts_with(basic_string_view(__s)); } - constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(basic_string_view __s) const noexcept { + [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(basic_string_view __s) const noexcept { return size() >= __s.size() && compare(size() - __s.size(), npos, __s) == 0; } - constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(value_type __c) const noexcept { + [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(value_type __c) const noexcept { return !empty() && _Traits::eq(back(), __c); } - constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept { + [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool + ends_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept { return ends_with(basic_string_view(__s)); } # endif # if _LIBCPP_STD_VER >= 23 - constexpr _LIBCPP_HIDE_FROM_ABI bool contains(basic_string_view __sv) const noexcept { return find(__sv) != npos; } + [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool contains(basic_string_view __sv) const noexcept { + return find(__sv) != npos; + } - constexpr _LIBCPP_HIDE_FROM_ABI bool contains(value_type __c) const noexcept { return find(__c) != npos; } + [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool contains(value_type __c) const noexcept { + return find(__c) != npos; + } - constexpr _LIBCPP_HIDE_FROM_ABI bool contains(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const { + [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool contains(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const { return find(__s) != npos; } # endif @@ -897,7 +908,8 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, basic_string_view<_CharT, _Trai // [string.view.hash] template struct __string_view_hash : public __unary_function >, size_t> { - _LIBCPP_HIDE_FROM_ABI size_t operator()(const basic_string_view<_CharT, char_traits<_CharT> > __val) const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t + operator()(const basic_string_view<_CharT, char_traits<_CharT> > __val) const _NOEXCEPT { return std::__do_string_hash(__val.data(), __val.data() + __val.size()); } }; @@ -924,30 +936,31 @@ struct hash > > : __string_view_ # if _LIBCPP_STD_VER >= 14 inline namespace literals { inline namespace string_view_literals { -inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view operator""sv(const char* __str, size_t __len) noexcept { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view +operator""sv(const char* __str, size_t __len) noexcept { return basic_string_view(__str, __len); } # if _LIBCPP_HAS_WIDE_CHARACTERS -inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view operator""sv(const wchar_t* __str, size_t __len) noexcept { return basic_string_view(__str, __len); } # endif # if _LIBCPP_HAS_CHAR8_T -inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view operator""sv(const char8_t* __str, size_t __len) noexcept { return basic_string_view(__str, __len); } # endif -inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view operator""sv(const char16_t* __str, size_t __len) noexcept { return basic_string_view(__str, __len); } -inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view operator""sv(const char32_t* __str, size_t __len) noexcept { return basic_string_view(__str, __len); } diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp index 0f695d4f1a229..2081e75fdf64b 100644 --- a/libcxx/src/locale.cpp +++ b/libcxx/src/locale.cpp @@ -5557,6 +5557,54 @@ string __num_get<_CharT>::__stage2_int_prep(ios_base& __iob, _CharT* __atoms, _C return __np.grouping(); } +template +int __num_get<_CharT>::__stage2_int_loop( + _CharT __ct, + int __base, + char* __a, + char*& __a_end, + unsigned& __dc, + _CharT __thousands_sep, + const string& __grouping, + unsigned* __g, + unsigned*& __g_end, + _CharT* __atoms) { + if (__a_end == __a && (__ct == __atoms[24] || __ct == __atoms[25])) { + *__a_end++ = __ct == __atoms[24] ? '+' : '-'; + __dc = 0; + return 0; + } + if (__grouping.size() != 0 && __ct == __thousands_sep) { + if (__g_end - __g < __num_get_buf_sz) { + *__g_end++ = __dc; + __dc = 0; + } + return 0; + } + ptrdiff_t __f = __atoms_offset(__atoms, __ct); + if (__f >= 24) + return -1; + switch (__base) { + case 8: + case 10: + if (__f >= __base) + return -1; + break; + case 16: + if (__f < 22) + break; + if (__a_end != __a && __a_end - __a <= 2 && __a_end[-1] == '0') { + __dc = 0; + *__a_end++ = __src[__f]; + return 0; + } + return -1; + } + *__a_end++ = __src[__f]; + ++__dc; + return 0; +} + template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS collate; _LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS collate;) diff --git a/libcxx/test/libcxx/diagnostics/string_view.nodiscard.verify.cpp b/libcxx/test/libcxx/diagnostics/string_view.nodiscard.verify.cpp index e5b2258315fe4..89e4a5b44ab48 100644 --- a/libcxx/test/libcxx/diagnostics/string_view.nodiscard.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/string_view.nodiscard.verify.cpp @@ -12,12 +12,140 @@ #include +#include "type_algorithms.h" #include "test_macros.h" -void test() { - std::string_view string_view; - string_view.empty(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +void test_members() { + std::string_view sv; + + sv.begin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.end(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.cbegin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.cend(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.rbegin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.rend(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.crbegin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.crend(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + sv.size(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.length(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.max_size(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + sv.empty(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + sv[0]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.at(0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + sv.front(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.back(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.data(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + sv.substr(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} #if TEST_STD_VER >= 26 - string_view.subview(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.subview(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +#endif + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.compare(sv); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.compare(0, 0, sv); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.compare(0, 0, sv, 0, 0); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.compare(""); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.compare(0, 0, ""); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.compare(0, 0, "", 0); + + sv.find(sv); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find(' '); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find("", 0, 0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find("", 0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + sv.rfind(sv); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.rfind(' '); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.rfind("", 0, 0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.rfind("", 0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_first_of(sv); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_first_of(' '); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_first_of("", 0, 0); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_first_of("", 0); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_last_of(sv); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_last_of(' '); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_last_of("", 0, 0); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_last_of("", 0); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_first_not_of(sv); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_first_not_of(' '); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_first_not_of("", 0, 0); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_first_not_of("", 0); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_last_not_of(sv); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_last_not_of(' '); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_last_not_of("", 0, 0); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.find_last_not_of("", 0); + +#if TEST_STD_VER >= 20 + sv.starts_with(sv); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.starts_with(' '); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.starts_with(""); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + sv.ends_with(sv); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.ends_with(' '); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.ends_with(""); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +#endif + +#if TEST_STD_VER >= 23 + sv.contains(sv); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.contains(' '); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sv.contains(""); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +#endif +} + +void test_nonmembers() { + // std::hash<> + + std::hash hash; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + hash(std::string_view{}); + +#if TEST_STD_VER >= 14 + // string_view literals + + using namespace std::string_view_literals; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + ""sv; // const char* +# if !defined(TEST_HAS_NO_WIDE_CHARACTERS) + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + L""sv; // const wchar_t* +# endif +# if !defined(TEST_HAS_NO_CHAR8_T) + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + u8""sv; // const char8_t* +# endif + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + u""sv; // const char16_t* + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + U""sv; // const char32_t* #endif } diff --git a/libcxx/test/libcxx/strings/string.view/nonnull.verify.cpp b/libcxx/test/libcxx/strings/string.view/nonnull.verify.cpp index 316c9828e0de5..ffe048730e687 100644 --- a/libcxx/test/libcxx/strings/string.view/nonnull.verify.cpp +++ b/libcxx/test/libcxx/strings/string.view/nonnull.verify.cpp @@ -10,8 +10,10 @@ // Ensure that APIs which take a CharT* are diagnosing passing a nullptr to them -// Clang 19 and AppleClang don't have diagnose_if with diagnostic flags -// UNSUPPORTED: clang-19, apple-clang-17 +// AppleClang doesn't have diagnose_if with diagnostic flags +// UNSUPPORTED: apple-clang-17 + +// ADDITIONAL_COMPILE_FLAGS: -Wno-unused-result #include diff --git a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp index 8df89d0ba9206..900ca0e5e1c5e 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp @@ -14,6 +14,11 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build +// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be +// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that +// allows suppression in system headers. +// XFAIL: * + #include // expected-warning@ccomplex:* {{ is deprecated in C++17 and removed in C++20. Include instead.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp index 32b57033331c8..a1ca842bc62ab 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp @@ -14,6 +14,11 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: clang-modules-build +// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be +// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that +// allows suppression in system headers. +// XFAIL: * + #include // expected-warning@ciso646:* {{ is removed in C++20. Include instead.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp index 23a7709a9d658..503a87658ac02 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp @@ -14,6 +14,11 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build +// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be +// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that +// allows suppression in system headers. +// XFAIL: * + #include // expected-warning@cstdalign:* {{ is deprecated in C++17 and removed in C++20.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp index c2c0f03c52d3c..80025c5ab72d2 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp @@ -14,6 +14,11 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build +// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be +// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that +// allows suppression in system headers. +// XFAIL: * + #include // expected-warning@cstdbool:* {{ is deprecated in C++17 and removed in C++20.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp index 4f5564915443d..07bdd29648a68 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp @@ -14,6 +14,11 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build +// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be +// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that +// allows suppression in system headers. +// XFAIL: * + #include // expected-warning@ctgmath:* {{ is deprecated in C++17 and removed in C++20. Include and instead.}} diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp index 015408294bc8c..a110aae2db11b 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp @@ -13,6 +13,8 @@ // iter_type get(iter_type in, iter_type end, ios_base&, // ios_base::iostate& err, long& v) const; +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + #include #include #include @@ -98,6 +100,18 @@ int main(int, char**) assert(err == ios.goodbit); assert(v == 291); } + { + const char str[] = "a123"; + std::dec(ios); + std::ios_base::iostate err = ios.goodbit; + cpp17_input_iterator iter = + f.get(cpp17_input_iterator(str), + cpp17_input_iterator(str+sizeof(str)), + ios, err, v); + assert(base(iter) == str); + assert(err == ios.failbit); + assert(v == 0); + } { const char str[] = "0x123"; std::hex(ios); @@ -519,6 +533,142 @@ int main(int, char**) assert(err == ios.failbit); assert(v == std::numeric_limits::max()); } + { + v = -1; + const char str[] = ""; + std::ios_base::iostate err = ios.goodbit; + + cpp17_input_iterator iter = + f.get(cpp17_input_iterator(str), cpp17_input_iterator(str), ios, err, v); + assert(base(iter) == str); + assert(err == (std::ios::eofbit | std::ios::failbit)); + assert(v == 0); + } + { + v = -1; + const char str[] = "+"; + std::ios_base::iostate err = ios.goodbit; + + cpp17_input_iterator iter = + f.get(cpp17_input_iterator(str), cpp17_input_iterator(str + 1), ios, err, v); + assert(base(iter) == str + 1); + assert(err == (std::ios::eofbit | std::ios::failbit)); + assert(v == 0); + } + { + v = -1; + const char str[] = "+"; + std::ios_base::iostate err = ios.goodbit; + + cpp17_input_iterator iter = f.get( + cpp17_input_iterator(std::begin(str)), + cpp17_input_iterator(std::end(str)), + ios, + err, + v); + assert(base(iter) == str + 1); + assert(err == ios.failbit); + assert(v == 0); + } + { + v = -1; + const char str[] = "-"; + std::ios_base::iostate err = ios.goodbit; + + cpp17_input_iterator iter = f.get( + cpp17_input_iterator(std::begin(str)), + cpp17_input_iterator(std::end(str)), + ios, + err, + v); + assert(base(iter) == str + 1); + assert(err == ios.failbit); + assert(v == 0); + } + { + v = -1; + const char str[] = "0"; + std::ios_base::iostate err = ios.goodbit; + + cpp17_input_iterator iter = f.get( + cpp17_input_iterator(std::begin(str)), + cpp17_input_iterator(std::end(str)), + ios, + err, + v); + assert(base(iter) == str + 1); + assert(err == ios.goodbit); + assert(v == 0); + } + { + v = -1; + const char str[] = "078"; + std::ios_base::iostate err = ios.goodbit; + + ios.flags(ios.flags() & ~ios.basefield); + cpp17_input_iterator iter = f.get( + cpp17_input_iterator(std::begin(str)), + cpp17_input_iterator(std::end(str)), + ios, + err, + v); + assert(base(iter) == str + 2); + assert(err == ios.goodbit); + assert(v == 7); + ios.flags(ios.flags() | ios.dec); + } + { + v = -1; + std::string str = std::to_string(std::numeric_limits::max()) + "99a"; + std::ios_base::iostate err = ios.goodbit; + + cpp17_input_iterator iter = f.get( + cpp17_input_iterator(str.data()), + cpp17_input_iterator(str.data() + str.size()), + ios, + err, + v); + assert(base(iter) == str.data() + str.size() - 1); + assert(err == ios.failbit); + assert(v == std::numeric_limits::max()); + } + { + std::string str = std::to_string(std::numeric_limits::max()) + 'c'; + std::ios_base::iostate err = ios.goodbit; + cpp17_input_iterator iter = + f.get(cpp17_input_iterator(str.data()), + cpp17_input_iterator(str.data() + str.size()), + ios, err, v); + assert(base(iter) == str.data() + str.size() - 1); + assert(err == ios.goodbit); + assert(v == std::numeric_limits::max()); + } + { + std::string str = std::to_string(static_cast(std::numeric_limits::max()) + 1) + 'c'; + std::ios_base::iostate err = ios.goodbit; + cpp17_input_iterator iter = f.get( + cpp17_input_iterator(str.data()), + cpp17_input_iterator(str.data() + str.size()), + ios, + err, + v); + assert(base(iter) == str.data() + str.size() - 1); + assert(err == ios.failbit); + assert(v == std::numeric_limits::max()); + } + { + std::string str = '-' + std::to_string(static_cast(std::numeric_limits::max()) + 2) + 'c'; + std::ios_base::iostate err = ios.goodbit; + cpp17_input_iterator iter = f.get( + cpp17_input_iterator(str.data()), + cpp17_input_iterator(str.data() + str.size()), + ios, + err, + v); + assert(base(iter) == str.data() + str.size() - 1); + assert(err == ios.failbit); + assert(v == std::numeric_limits::min()); + } return 0; } diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_int.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_int.pass.cpp index bee1be08baafc..f9cef08e247d0 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_int.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_int.pass.cpp @@ -68,6 +68,17 @@ int main(int, char**) assert(err == ios.goodbit); assert(v == 1); } + { + const char str[] = "-1"; + std::ios_base::iostate err = ios.goodbit; + cpp17_input_iterator iter = + f.get(cpp17_input_iterator(str), + cpp17_input_iterator(str+sizeof(str)), + ios, err, v); + assert(base(iter) == str+sizeof(str)-1); + assert(err == ios.goodbit); + assert(v == std::numeric_limits::max()); + } std::hex(ios); { const char str[] = "0xFFFFFFFF"; diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long.pass.cpp index b087bdcd94017..fed6fc0246d82 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long.pass.cpp @@ -68,6 +68,17 @@ int main(int, char**) assert(err == ios.goodbit); assert(v == 1); } + { + const char str[] = "-1"; + std::ios_base::iostate err = ios.goodbit; + cpp17_input_iterator iter = + f.get(cpp17_input_iterator(str), + cpp17_input_iterator(str+sizeof(str)), + ios, err, v); + assert(base(iter) == str+sizeof(str)-1); + assert(err == ios.goodbit); + assert(v == std::numeric_limits::max()); + } std::hex(ios); { const char str[] = "0xFFFFFFFF"; diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long_long.pass.cpp index 6769aebe424e3..0bdb6c1c38606 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long_long.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long_long.pass.cpp @@ -68,6 +68,17 @@ int main(int, char**) assert(err == ios.goodbit); assert(v == 1); } + { + const char str[] = "-1"; + std::ios_base::iostate err = ios.goodbit; + cpp17_input_iterator iter = + f.get(cpp17_input_iterator(str), + cpp17_input_iterator(str+sizeof(str)), + ios, err, v); + assert(base(iter) == str+sizeof(str)-1); + assert(err == ios.goodbit); + assert(v == std::numeric_limits::max()); + } std::hex(ios); { const char str[] = "0xFFFFFFFFFFFFFFFF"; diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_short.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_short.pass.cpp index bec9a7ff9e3bb..decfbe943461f 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_short.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_short.pass.cpp @@ -68,6 +68,17 @@ int main(int, char**) assert(err == ios.goodbit); assert(v == 1); } + { + const char str[] = "-1"; + std::ios_base::iostate err = ios.goodbit; + cpp17_input_iterator iter = + f.get(cpp17_input_iterator(str), + cpp17_input_iterator(str+sizeof(str)), + ios, err, v); + assert(base(iter) == str+sizeof(str)-1); + assert(err == ios.goodbit); + assert(v == std::numeric_limits::max()); + } std::hex(ios); { const char str[] = "0xFFFF"; diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml index 8b77a06323e3d..2ac69c38ebffa 100644 --- a/libcxx/utils/ci/buildkite-pipeline.yml +++ b/libcxx/utils/ci/buildkite-pipeline.yml @@ -33,64 +33,63 @@ definitions: - "**/CMakeOutput.log" steps: -# Linaro's ARM builders are temporarily offline. -#- group: ARM -# steps: -# - label: AArch64 -# command: libcxx/utils/ci/run-buildbot aarch64 -# agents: -# queue: libcxx-builders-linaro-arm -# arch: aarch64 -# <<: *common -# -# - label: AArch64 -fno-exceptions -# command: libcxx/utils/ci/run-buildbot aarch64-no-exceptions -# agents: -# queue: libcxx-builders-linaro-arm -# arch: aarch64 -# <<: *common -# -# - label: Armv8 -# command: libcxx/utils/ci/run-buildbot armv8 -# agents: -# queue: libcxx-builders-linaro-arm -# arch: armv8l -# <<: *common -# -# - label: Armv8 -fno-exceptions -# command: libcxx/utils/ci/run-buildbot armv8-no-exceptions -# agents: -# queue: libcxx-builders-linaro-arm -# arch: armv8l -# <<: *common -# -# - label: Armv7 -# command: libcxx/utils/ci/run-buildbot armv7 -# agents: -# queue: libcxx-builders-linaro-arm -# arch: armv8l -# <<: *common -# -# - label: Armv7 -fno-exceptions -# command: libcxx/utils/ci/run-buildbot armv7-no-exceptions -# agents: -# queue: libcxx-builders-linaro-arm -# arch: armv8l -# <<: *common -# -# - label: Armv7-M picolibc -# command: libcxx/utils/ci/run-buildbot armv7m-picolibc -# agents: -# queue: libcxx-builders-linaro-arm -# arch: aarch64 -# <<: *common -# -# - label: Armv7-M picolibc -fno-exceptions -# command: libcxx/utils/ci/run-buildbot armv7m-picolibc-no-exceptions -# agents: -# queue: libcxx-builders-linaro-arm -# arch: aarch64 -# <<: *common +- group: ARM + steps: + - label: AArch64 + command: libcxx/utils/ci/run-buildbot aarch64 + agents: + queue: libcxx-builders-linaro-arm + arch: aarch64 + <<: *common + + - label: AArch64 -fno-exceptions + command: libcxx/utils/ci/run-buildbot aarch64-no-exceptions + agents: + queue: libcxx-builders-linaro-arm + arch: aarch64 + <<: *common + + - label: Armv8 + command: libcxx/utils/ci/run-buildbot armv8 + agents: + queue: libcxx-builders-linaro-arm + arch: armv8l + <<: *common + + - label: Armv8 -fno-exceptions + command: libcxx/utils/ci/run-buildbot armv8-no-exceptions + agents: + queue: libcxx-builders-linaro-arm + arch: armv8l + <<: *common + + - label: Armv7 + command: libcxx/utils/ci/run-buildbot armv7 + agents: + queue: libcxx-builders-linaro-arm + arch: armv8l + <<: *common + + - label: Armv7 -fno-exceptions + command: libcxx/utils/ci/run-buildbot armv7-no-exceptions + agents: + queue: libcxx-builders-linaro-arm + arch: armv8l + <<: *common + + - label: Armv7-M picolibc + command: libcxx/utils/ci/run-buildbot armv7m-picolibc + agents: + queue: libcxx-builders-linaro-arm + arch: aarch64 + <<: *common + + - label: Armv7-M picolibc -fno-exceptions + command: libcxx/utils/ci/run-buildbot armv7m-picolibc-no-exceptions + agents: + queue: libcxx-builders-linaro-arm + arch: aarch64 + <<: *common - group: AIX steps: diff --git a/libsycl/Maintainers.md b/libsycl/Maintainers.md new file mode 100644 index 0000000000000..4ffc9e87d3bdd --- /dev/null +++ b/libsycl/Maintainers.md @@ -0,0 +1,13 @@ +# libsycl Maintainers + +This file is a list of the +[maintainers](https://llvm.org/docs/DeveloperPolicy.html#maintainers) for +the SYCL Runtime library. + +# Current Maintainers + +Alexey Bader \ +| alexey.bader@intel.com (email), bader (GitHub, Discord, Discourse) + +Kseniya Tikhomirova \ +| kseniya.tikhomirova@intel.com (email), KseniyaTikhomirova (GitHub, Discourse) \ No newline at end of file diff --git a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp index 987586b97dfdc..b6b073a96bcad 100644 --- a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp +++ b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp @@ -63,7 +63,7 @@ static void DumpUnwindRowsToLog(Log *log, AddressRange range, } static void DumpInstToLog(Log *log, Instruction &inst, - InstructionList inst_list) { + const InstructionList &inst_list) { if (!log || !log->GetVerbose()) return; const bool show_address = true; diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index 0b77e0a4929a7..48107717abd31 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -477,18 +477,17 @@ bool AddPythonDLLToSearchPath() { #endif #ifdef LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME -/// Returns whether `python3x.dll` is in the DLL search path. +/// Returns true if `python3x.dll` can be loaded. bool IsPythonDLLInPath() { #define WIDEN2(x) L##x #define WIDEN(x) WIDEN2(x) - WCHAR foundPath[MAX_PATH]; - DWORD result = - SearchPathW(nullptr, WIDEN(LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME), nullptr, - MAX_PATH, foundPath, nullptr); + HMODULE h = LoadLibraryW(WIDEN(LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME)); + if (!h) + return false; + FreeLibrary(h); + return true; #undef WIDEN2 #undef WIDEN - - return result > 0; } #endif diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md index b0d4e46bb508f..2f897332f40c9 100644 --- a/llvm/Maintainers.md +++ b/llvm/Maintainers.md @@ -511,6 +511,8 @@ Some subprojects maintain their own list of per-component maintainers. [libclc maintainers](https://github.com/llvm/llvm-project/blob/main/libclc/Maintainers.md) +[libsycl maintainers](https://github.com/llvm/llvm-project/blob/main/libsycl/Maintainers.md) + [LLD maintainers](https://github.com/llvm/llvm-project/blob/main/lld/Maintainers.md) [LLDB maintainers](https://github.com/llvm/llvm-project/blob/main/lldb/Maintainers.md) diff --git a/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.rst b/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.rst index 6aec05f29329a..ee12d983dd99e 100644 --- a/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.rst +++ b/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.rst @@ -453,7 +453,7 @@ starts with: return LHS; This code gets the precedence of the current token and checks to see if -if is too low. Because we defined invalid tokens to have a precedence of +it is too low. Because we defined invalid tokens to have a precedence of -1, this check implicitly knows that the pair-stream ends when the token stream runs out of binary operators. If this check succeeds, we know that the token is a binary operator and that it will be included in this diff --git a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h index 5b9535380aebf..d6f5d926f022c 100644 --- a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h @@ -708,7 +708,11 @@ class LLVM_ABI DWARFLinker : public DWARFLinkerBase { /// already there. /// \returns is a name was found. bool getDIENames(const DWARFDie &Die, AttributesInfo &Info, - OffsetsStringPool &StringPool, bool StripTemplate = false); + OffsetsStringPool &StringPool, const DWARFFile &File, + CompileUnit &Unit, bool StripTemplate = false); + + llvm::StringRef getCanonicalDIEName(DWARFDie Die, const DWARFFile &File, + CompileUnit *Unit); uint32_t hashFullyQualifiedName(DWARFDie DIE, CompileUnit &U, const DWARFFile &File, diff --git a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h index 9fb1b3f80e2ff..5ced6d05cc231 100644 --- a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h +++ b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h @@ -84,11 +84,13 @@ class DeclContext { DeclContext() : DefinedInClangModule(0), Parent(*this) {} DeclContext(unsigned Hash, uint32_t Line, uint32_t ByteSize, uint16_t Tag, - StringRef Name, StringRef File, const DeclContext &Parent, - DWARFDie LastSeenDIE = DWARFDie(), unsigned CUId = 0) + StringRef Name, StringRef NameForUniquing, StringRef File, + const DeclContext &Parent, DWARFDie LastSeenDIE = DWARFDie(), + unsigned CUId = 0) : QualifiedNameHash(Hash), Line(Line), ByteSize(ByteSize), Tag(Tag), - DefinedInClangModule(0), Name(Name), File(File), Parent(Parent), - LastSeenDIE(LastSeenDIE), LastSeenCompileUnitID(CUId) {} + DefinedInClangModule(0), Name(Name), NameForUniquing(NameForUniquing), + File(File), Parent(Parent), LastSeenDIE(LastSeenDIE), + LastSeenCompileUnitID(CUId) {} uint32_t getQualifiedNameHash() const { return QualifiedNameHash; } @@ -100,6 +102,7 @@ class DeclContext { uint32_t getCanonicalDIEOffset() const { return CanonicalDIEOffset; } void setCanonicalDIEOffset(uint32_t Offset) { CanonicalDIEOffset = Offset; } + llvm::StringRef getCanonicalName() const { return Name; } bool isDefinedInClangModule() const { return DefinedInClangModule; } void setDefinedInClangModule(bool Val) { DefinedInClangModule = Val; } @@ -115,6 +118,7 @@ class DeclContext { uint16_t Tag = dwarf::DW_TAG_compile_unit; unsigned DefinedInClangModule : 1; StringRef Name; + StringRef NameForUniquing; StringRef File; const DeclContext &Parent; DWARFDie LastSeenDIE; @@ -180,7 +184,7 @@ struct DeclMapInfo : private DenseMapInfo { return RHS == LHS; return LHS->QualifiedNameHash == RHS->QualifiedNameHash && LHS->Line == RHS->Line && LHS->ByteSize == RHS->ByteSize && - LHS->Name.data() == RHS->Name.data() && + LHS->NameForUniquing.data() == RHS->NameForUniquing.data() && LHS->File.data() == RHS->File.data() && LHS->Parent.QualifiedNameHash == RHS->Parent.QualifiedNameHash; } diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index e624088a0964e..ce933c5f1be85 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -307,7 +307,13 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate { /// number if it is not zero. If DstReg is a physical register and the /// existing subregister number of the def / use being updated is not zero, /// make sure to set it to the correct physical subregister. - void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); + /// + /// If \p SubregToRegSrcInsts is not empty, we are coalescing a + /// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an + /// implicit-def of DstReg on instructions that define SrcReg. + void updateRegDefsUses( + Register SrcReg, Register DstReg, unsigned SubIdx, + SmallPtrSetImpl *SubregToRegSrcInsts = nullptr); /// If the given machine operand reads only undefined lanes add an undef /// flag. @@ -1444,6 +1450,7 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP, // CopyMI may have implicit operands, save them so that we can transfer them // over to the newly materialized instruction after CopyMI is removed. + LaneBitmask NewMIImplicitOpsMask; SmallVector ImplicitOps; ImplicitOps.reserve(CopyMI->getNumOperands() - CopyMI->getDesc().getNumOperands()); @@ -1458,6 +1465,9 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP, (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) && "unexpected implicit virtual register def"); ImplicitOps.push_back(MO); + if (MO.isDef() && MO.getReg().isVirtual() && + MRI->shouldTrackSubRegLiveness(DstReg)) + NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); } } @@ -1494,14 +1504,11 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP, } else { assert(MO.getReg() == NewMI.getOperand(0).getReg()); - // We're only expecting another def of the main output, so the range - // should get updated with the regular output range. - // - // FIXME: The range updating below probably needs updating to look at - // the super register if subranges are tracked. - assert(!MRI->shouldTrackSubRegLiveness(DstReg) && - "subrange update for implicit-def of super register may not be " - "properly handled"); + // If lanemasks need to be tracked, compile the lanemask of the NewMI + // implicit def operands to avoid subranges for the super-regs from + // being removed by code later on in this function. + if (MRI->shouldTrackSubRegLiveness(MO.getReg())) + NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); } } } @@ -1617,7 +1624,8 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP, *LIS->getSlotIndexes(), *TRI); for (LiveInterval::SubRange &SR : DstInt.subranges()) { - if ((SR.LaneMask & DstMask).none()) { + if ((SR.LaneMask & DstMask).none() && + (SR.LaneMask & NewMIImplicitOpsMask).none()) { LLVM_DEBUG(dbgs() << "Removing undefined SubRange " << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); @@ -1891,11 +1899,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, } } -void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, - unsigned SubIdx) { +void RegisterCoalescer::updateRegDefsUses( + Register SrcReg, Register DstReg, unsigned SubIdx, + SmallPtrSetImpl *SubregToRegSrcInsts) { bool DstIsPhys = DstReg.isPhysical(); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); + // Coalescing a COPY may expose reads of 'undef' subregisters. + // If so, then explicitly propagate 'undef' to those operands. if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { for (MachineOperand &MO : MRI->reg_operands(DstReg)) { if (MO.isUndef()) @@ -1912,6 +1923,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, } } + // If DstInt already has a subrange for the unused lanes, then we shouldn't + // create duplicate subranges when we update the interval for unused lanes. + LaneBitmask DstIntLaneMask; + if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { + for (LiveInterval::SubRange &SR : DstInt->subranges()) + DstIntLaneMask |= SR.LaneMask; + } + + // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'. SmallPtrSet Visited; for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end(); @@ -1935,6 +1955,82 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr()) Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); + bool RequiresImplicitRedef = false; + if (SubregToRegSrcInsts && !SubregToRegSrcInsts->empty()) { + // We can only add an implicit-def and undef if the sub registers match, + // e.g. + // %0:gr32 = INSTX + // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined + // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32 + // + // This cannot be transformed into: + // %1.sub32:gr64 = INSTX + // undef %1.sub8:gr64 = INSTY , implicit-def %1 + // + // because the undef means that none of the bits of %1 are read, thus + // thrashing the top 24 bits of %1.sub32. + if (SubregToRegSrcInsts->contains(UseMI) && + all_of(UseMI->all_defs(), + [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool { + if (MO.getReg() != SrcReg) // Ignore unrelated registers + return true; + return MO.isUndef() || + (SubIdx && + (!MO.getSubReg() || SubIdx == MO.getSubReg())); + })) { + // Add implicit-def of super-register to express that the whole + // register is defined by the instruction. + UseMI->addRegisterDefined(DstReg); + RequiresImplicitRedef = true; + } + + // If the coalesed instruction doesn't fully define the register, we need + // to preserve the original super register liveness for SUBREG_TO_REG. + // + // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes, + // but it introduces liveness for other subregisters. Downstream users may + // have been relying on those bits, so we need to ensure their liveness is + // captured with a def of other lanes. + if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { + // First check if there is sufficient granularity in terms of subranges. + LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg()); + LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask UnusedLanes = DstMask & ~UsedLanes; + if ((UnusedLanes & ~DstIntLaneMask).any()) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt); + DstIntLaneMask |= UnusedLanes; + } + + // After duplicating the live ranges for the low/hi bits, we + // need to update the subranges of the DstReg interval such that + // for a case like this: + // + // entry: + // 16B %1:gpr32 = INSTRUCTION (<=> UseMI) + // : + // if.then: + // 32B %1:gpr32 = MOVIMM32 .. + // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32 + // + // Only the MOVIMM32 requires a def of the top lanes and any intervals + // for the top 32-bits of the def at 16B should be removed. + for (LiveInterval::SubRange &SR : DstInt->subranges()) { + if (!Writes || RequiresImplicitRedef || + (SR.LaneMask & UnusedLanes).none()) + continue; + + assert((SR.LaneMask & UnusedLanes) == SR.LaneMask && + "Unexpected lanemask. Subrange needs finer granularity"); + + SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(); + auto SegmentI = SR.find(UseIdx); + if (SegmentI != SR.end()) + SR.removeSegment(SegmentI, true); + } + } + } + // Replace SrcReg with DstReg in all UseMI operands. for (unsigned Op : Ops) { MachineOperand &MO = UseMI->getOperand(Op); @@ -1943,7 +2039,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, // turn a full def into a read-modify-write sub-register def and vice // versa. if (SubIdx && MO.isDef()) - MO.setIsUndef(!Reads); + MO.setIsUndef(!Reads || RequiresImplicitRedef); // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. @@ -2046,6 +2142,38 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI, LIS->shrinkToUses(&LI); } +/// For a given use of value \p Idx, it returns the def in the current block, +/// or otherwise all possible defs in preceding blocks. +static bool findPrecedingDefs(SmallPtrSetImpl &Instrs, + LiveIntervals *LIS, LiveInterval &SrcInt, + MachineBasicBlock *MBB, VNInfo *Idx) { + auto IsPrecedingDef = [&](VNInfo *Idx) -> bool { + if (Idx->isPHIDef()) + return false; + MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def); + assert(Def && "Unable to find a def for SUBREG_TO_REG source operand"); + Instrs.insert(Def); + return true; + }; + + if (IsPrecedingDef(Idx)) + return true; + + SmallVector Worklist(MBB->pred_begin(), MBB->pred_end()); + SmallPtrSet VisitedBlocks; + while (!Worklist.empty()) { + MachineBasicBlock *MBB = Worklist.pop_back_val(); + auto [_, Inserted] = VisitedBlocks.insert(MBB); + if (!Inserted) + continue; + VNInfo *Idx = SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(MBB)); + if (!IsPrecedingDef(Idx)) + Worklist.append(MBB->pred_begin(), MBB->pred_end()); + } + + return !Instrs.empty(); +} + bool RegisterCoalescer::joinCopy( MachineInstr *CopyMI, bool &Again, SmallPtrSetImpl &CurrentErasedInstrs) { @@ -2183,6 +2311,34 @@ bool RegisterCoalescer::joinCopy( }); } + SmallPtrSet SubregToRegSrcInsts; + Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); + if (CopyMI->isSubregToReg() && !SrcReg.isPhysical()) { + // For the case where the copy instruction is a SUBREG_TO_REG, e.g. + // + // %0:gpr32 = movimm32 .. + // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32 + // ... + // %0:gpr32 = COPY + // + // After joining liveranges, the original `movimm32` will need an + // implicit-def to make it explicit that the entire register is written, + // i.e. + // + // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0 + // ... + // undef %0.sub32:gpr64 = COPY // Note that this does not + // // require an implicit-def, + // // because it has nothing to + // // do with the SUBREG_TO_REG. + LiveInterval &SrcInt = LIS->getInterval(SrcReg); + SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI); + if (!findPrecedingDefs(SubregToRegSrcInsts, LIS, SrcInt, + CopyMI->getParent(), + SrcInt.Query(SubregToRegSlotIdx).valueIn())) + llvm_unreachable("SUBREG_TO_REG src requires a def"); + } + ShrinkMask = LaneBitmask::getNone(); ShrinkMainRange = false; @@ -2253,7 +2409,8 @@ bool RegisterCoalescer::joinCopy( // Also update DstReg operands to include DstIdx if it is set. if (CP.getDstIdx()) updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); - updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(), + &SubregToRegSrcInsts); // Shrink subregister ranges if necessary. if (ShrinkMask.any()) { diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index 8ec4bfbb5a330..cf064b90a7d34 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -448,7 +448,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) { const MachineInstr *DefMI = LIS.getInstructionFromIndex(Def); assert(DefMI != nullptr); LaneBitmask LM; - for (const MachineOperand &DefOp : DefMI->defs()) { + for (const MachineOperand &DefOp : DefMI->all_defs()) { Register R = DefOp.getReg(); if (R != LI.reg()) continue; diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp index 8637b55c78f9c..daf3788639451 100644 --- a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp @@ -151,22 +151,84 @@ static bool isTypeTag(uint16_t Tag) { return false; } -bool DWARFLinker::DIECloner::getDIENames(const DWARFDie &Die, - AttributesInfo &Info, - OffsetsStringPool &StringPool, - bool StripTemplate) { +/// Recurse through the input DIE's canonical references until we find a +/// DW_AT_name. +llvm::StringRef +DWARFLinker::DIECloner::getCanonicalDIEName(DWARFDie Die, const DWARFFile &File, + CompileUnit *Unit) { + if (!Die) + return {}; + + std::optional Ref; + + auto GetDieName = [](const DWARFDie &D) -> llvm::StringRef { + auto NameForm = D.find(llvm::dwarf::DW_AT_name); + if (!NameForm) + return {}; + + auto NameOrErr = NameForm->getAsCString(); + if (!NameOrErr) { + llvm::consumeError(NameOrErr.takeError()); + return {}; + } + + return *NameOrErr; + }; + + llvm::StringRef Name = GetDieName(Die); + if (!Name.empty()) + return Name; + + while (true) { + if (!(Ref = Die.find(llvm::dwarf::DW_AT_specification)) && + !(Ref = Die.find(llvm::dwarf::DW_AT_abstract_origin))) + break; + + Die = Linker.resolveDIEReference(File, CompileUnits, *Ref, Die, Unit); + if (!Die) + break; + + assert(Unit); + + unsigned SpecIdx = Unit->getOrigUnit().getDIEIndex(Die); + CompileUnit::DIEInfo &SpecInfo = Unit->getInfo(SpecIdx); + if (SpecInfo.Ctxt && SpecInfo.Ctxt->hasCanonicalDIE()) { + if (!SpecInfo.Ctxt->getCanonicalName().empty()) { + Name = SpecInfo.Ctxt->getCanonicalName(); + break; + } + } + + Name = GetDieName(Die); + if (!Name.empty()) + break; + } + + return Name; +} + +bool DWARFLinker::DIECloner::getDIENames( + const DWARFDie &Die, AttributesInfo &Info, OffsetsStringPool &StringPool, + const DWARFFile &File, CompileUnit &Unit, bool StripTemplate) { // This function will be called on DIEs having low_pcs and // ranges. As getting the name might be more expansive, filter out // blocks directly. if (Die.getTag() == dwarf::DW_TAG_lexical_block) return false; + // The mangled name of an specification DIE will by virtue of the + // uniquing algorithm be the same as the one it got uniqued into. + // So just use the input DIE's linkage name. if (!Info.MangledName) if (const char *MangledName = Die.getLinkageName()) Info.MangledName = StringPool.getEntry(MangledName); + // For subprograms with linkage names, we unique on the linkage name, + // so DW_AT_name's may differ between the input and canonical DIEs. + // Use the name of the canonical DIE. if (!Info.Name) - if (const char *Name = Die.getShortName()) + if (llvm::StringRef Name = getCanonicalDIEName(Die, File, &Unit); + !Name.empty()) Info.Name = StringPool.getEntry(Name); if (!Info.MangledName) @@ -1939,7 +2001,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, // accelerator tables too. For now stick with dsymutil's behavior. if ((Info.InDebugMap || AttrInfo.HasLowPc || AttrInfo.HasRanges) && Tag != dwarf::DW_TAG_compile_unit && - getDIENames(InputDIE, AttrInfo, DebugStrPool, + getDIENames(InputDIE, AttrInfo, DebugStrPool, File, Unit, Tag != dwarf::DW_TAG_inlined_subroutine)) { if (AttrInfo.MangledName && AttrInfo.MangledName != AttrInfo.Name) Unit.addNameAccelerator(Die, AttrInfo.MangledName, @@ -1962,7 +2024,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, } else if (Tag == dwarf::DW_TAG_imported_declaration && AttrInfo.Name) { Unit.addNamespaceAccelerator(Die, AttrInfo.Name); } else if (isTypeTag(Tag) && !AttrInfo.IsDeclaration) { - bool Success = getDIENames(InputDIE, AttrInfo, DebugStrPool); + bool Success = getDIENames(InputDIE, AttrInfo, DebugStrPool, File, Unit); uint64_t RuntimeLang = dwarf::toUnsigned(InputDIE.find(dwarf::DW_AT_APPLE_runtime_class)) .value_or(0); diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp index c9c8dddce9c44..66a1ba9c6711f 100644 --- a/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp @@ -84,24 +84,26 @@ DeclContextTree::getChildDeclContext(DeclContext &Context, const DWARFDie &DIE, break; } - StringRef NameRef; + StringRef Name = DIE.getShortName(); + StringRef NameForUniquing; StringRef FileRef; if (const char *LinkageName = DIE.getLinkageName()) - NameRef = StringPool.internString(LinkageName); - else if (const char *ShortName = DIE.getShortName()) - NameRef = StringPool.internString(ShortName); + NameForUniquing = StringPool.internString(LinkageName); + else if (!Name.empty()) + NameForUniquing = StringPool.internString(Name); - bool IsAnonymousNamespace = NameRef.empty() && Tag == dwarf::DW_TAG_namespace; + bool IsAnonymousNamespace = + NameForUniquing.empty() && Tag == dwarf::DW_TAG_namespace; if (IsAnonymousNamespace) { // FIXME: For dsymutil-classic compatibility. I think uniquing within // anonymous namespaces is wrong. There is no ODR guarantee there. - NameRef = "(anonymous namespace)"; + NameForUniquing = "(anonymous namespace)"; } if (Tag != dwarf::DW_TAG_class_type && Tag != dwarf::DW_TAG_structure_type && Tag != dwarf::DW_TAG_union_type && - Tag != dwarf::DW_TAG_enumeration_type && NameRef.empty()) + Tag != dwarf::DW_TAG_enumeration_type && NameForUniquing.empty()) return PointerIntPair(nullptr); unsigned Line = 0; @@ -140,10 +142,10 @@ DeclContextTree::getChildDeclContext(DeclContext &Context, const DWARFDie &DIE, } } - if (!Line && NameRef.empty()) + if (!Line && NameForUniquing.empty()) return PointerIntPair(nullptr); - // We hash NameRef, which is the mangled name, in order to get most + // We hash NameForUniquing, which is the mangled name, in order to get most // overloaded functions resolve correctly. // // Strictly speaking, hashing the Tag is only necessary for a @@ -153,7 +155,8 @@ DeclContextTree::getChildDeclContext(DeclContext &Context, const DWARFDie &DIE, // FIXME: dsymutil-classic won't unique the same type presented // once as a struct and once as a class. Using the Tag in the fully // qualified name hash to get the same effect. - unsigned Hash = hash_combine(Context.getQualifiedNameHash(), Tag, NameRef); + unsigned Hash = + hash_combine(Context.getQualifiedNameHash(), Tag, NameForUniquing); // FIXME: dsymutil-classic compatibility: when we don't have a name, // use the filename. @@ -161,15 +164,16 @@ DeclContextTree::getChildDeclContext(DeclContext &Context, const DWARFDie &DIE, Hash = hash_combine(Hash, FileRef); // Now look if this context already exists. - DeclContext Key(Hash, Line, ByteSize, Tag, NameRef, FileRef, Context); + DeclContext Key(Hash, Line, ByteSize, Tag, Name, NameForUniquing, FileRef, + Context); auto ContextIter = Contexts.find(&Key); if (ContextIter == Contexts.end()) { // The context wasn't found. bool Inserted; - DeclContext *NewContext = - new (Allocator) DeclContext(Hash, Line, ByteSize, Tag, NameRef, FileRef, - Context, DIE, U.getUniqueID()); + DeclContext *NewContext = new (Allocator) + DeclContext(Hash, Line, ByteSize, Tag, Name, NameForUniquing, FileRef, + Context, DIE, U.getUniqueID()); std::tie(ContextIter, Inserted) = Contexts.insert(NewContext); assert(Inserted && "Failed to insert DeclContext"); (void)Inserted; diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp index 8967a2eb1749e..49674b4c32de0 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -87,6 +87,19 @@ Error NativeSession::createFromPdb(std::unique_ptr Buffer, return Error::success(); } +static Error validatePdbMagic(StringRef PdbPath) { + file_magic Magic; + if (auto EC = identify_magic(PdbPath, Magic)) + return make_error(EC); + + if (Magic != file_magic::pdb) + return make_error( + raw_error_code::invalid_format, + "The input file did not contain the pdb file magic."); + + return Error::success(); +} + static Expected> loadPdbFile(StringRef PdbPath, std::unique_ptr &Allocator) { ErrorOr> ErrorOrBuffer = @@ -97,10 +110,8 @@ loadPdbFile(StringRef PdbPath, std::unique_ptr &Allocator) { std::unique_ptr Buffer = std::move(*ErrorOrBuffer); PdbPath = Buffer->getBufferIdentifier(); - file_magic Magic; - auto EC = identify_magic(PdbPath, Magic); - if (EC || Magic != file_magic::pdb) - return make_error(EC); + if (auto EC = validatePdbMagic(PdbPath)) + return std::move(EC); auto Stream = std::make_unique( std::move(Buffer), llvm::endianness::little); @@ -152,10 +163,8 @@ Error NativeSession::createFromExe(StringRef ExePath, if (!PdbPath) return PdbPath.takeError(); - file_magic Magic; - auto EC = identify_magic(PdbPath.get(), Magic); - if (EC || Magic != file_magic::pdb) - return make_error(EC); + if (auto EC = validatePdbMagic(PdbPath.get())) + return EC; auto Allocator = std::make_unique(); auto File = loadPdbFile(PdbPath.get(), Allocator); diff --git a/llvm/lib/Support/AllocToken.cpp b/llvm/lib/Support/AllocToken.cpp index 8e9e89f0df353..daa40d4e9dcc6 100644 --- a/llvm/lib/Support/AllocToken.cpp +++ b/llvm/lib/Support/AllocToken.cpp @@ -24,6 +24,7 @@ llvm::getAllocTokenModeFromString(StringRef Name) { .Case("random", AllocTokenMode::Random) .Case("typehash", AllocTokenMode::TypeHash) .Case("typehashpointersplit", AllocTokenMode::TypeHashPointerSplit) + .Case("default", DefaultAllocTokenMode) .Default(std::nullopt); } diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 73d9699f71477..5da6181ba36dd 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -162,8 +162,7 @@ class AArch64AsmPrinter : public AsmPrinter { Register ScratchReg, AArch64PACKey::ID Key, AArch64PAuth::AuthCheckMethod Method, - bool ShouldTrap, - const MCSymbol *OnFailure); + const MCSymbol *OnFailure = nullptr); // Check authenticated LR before tail calling. void emitPtrauthTailCallHardening(const MachineInstr *TC); @@ -1937,14 +1936,19 @@ Register AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc, return ScratchReg; } -/// Emits a code sequence to check an authenticated pointer value. +/// Emit a code sequence to check an authenticated pointer value. /// -/// If OnFailure argument is passed, jump there on check failure instead -/// of proceeding to the next instruction (only if ShouldTrap is false). +/// This function emits a sequence of instructions that checks if TestedReg was +/// authenticated successfully. On success, execution continues at the next +/// instruction after the sequence. +/// +/// The action performed on failure depends on the OnFailure argument: +/// * if OnFailure is not nullptr, control is transferred to that label after +/// clearing the PAC field +/// * otherwise, BRK instruction is emitted to generate an error void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue( Register TestedReg, Register ScratchReg, AArch64PACKey::ID Key, - AArch64PAuth::AuthCheckMethod Method, bool ShouldTrap, - const MCSymbol *OnFailure) { + AArch64PAuth::AuthCheckMethod Method, const MCSymbol *OnFailure) { // Insert a sequence to check if authentication of TestedReg succeeded, // such as: // @@ -1981,7 +1985,7 @@ void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue( .addReg(getWRegFromXReg(ScratchReg)) .addReg(TestedReg) .addImm(0)); - assert(ShouldTrap && !OnFailure && "DummyLoad always traps on error"); + assert(!OnFailure && "DummyLoad always traps on error"); return; } @@ -2035,15 +2039,14 @@ void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue( llvm_unreachable("Unsupported check method"); } - if (ShouldTrap) { - assert(!OnFailure && "Cannot specify OnFailure with ShouldTrap"); + if (!OnFailure) { // Trapping sequences do a 'brk'. // brk #<0xc470 + aut key> EmitToStreamer(MCInstBuilder(AArch64::BRK).addImm(0xc470 | Key)); } else { // Non-trapping checked sequences return the stripped result in TestedReg, - // skipping over success-only code (such as re-signing the pointer) if - // there is one. + // skipping over success-only code (such as re-signing the pointer) by + // jumping to OnFailure label. // Note that this can introduce an authentication oracle (such as based on // the high bits of the re-signed value). @@ -2068,12 +2071,9 @@ void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue( MCInstBuilder(XPACOpc).addReg(TestedReg).addReg(TestedReg)); } - if (OnFailure) { - // b Lend - EmitToStreamer( - MCInstBuilder(AArch64::B) - .addExpr(MCSymbolRefExpr::create(OnFailure, OutContext))); - } + // b Lend + const auto *OnFailureExpr = MCSymbolRefExpr::create(OnFailure, OutContext); + EmitToStreamer(MCInstBuilder(AArch64::B).addExpr(OnFailureExpr)); } // If the auth check succeeds, we can continue. @@ -2100,9 +2100,8 @@ void AArch64AsmPrinter::emitPtrauthTailCallHardening(const MachineInstr *TC) { "Neither x16 nor x17 is available as a scratch register"); AArch64PACKey::ID Key = AArch64FI->shouldSignWithBKey() ? AArch64PACKey::IB : AArch64PACKey::IA; - emitPtrauthCheckAuthenticatedValue( - AArch64::LR, ScratchReg, Key, LRCheckMethod, - /*ShouldTrap=*/true, /*OnFailure=*/nullptr); + emitPtrauthCheckAuthenticatedValue(AArch64::LR, ScratchReg, Key, + LRCheckMethod); } void AArch64AsmPrinter::emitPtrauthAuthResign( @@ -2176,9 +2175,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign( if (IsAUTPAC && !ShouldTrap) EndSym = createTempSymbol("resign_end_"); - emitPtrauthCheckAuthenticatedValue(AUTVal, Scratch, AUTKey, - AArch64PAuth::AuthCheckMethod::XPAC, - ShouldTrap, EndSym); + emitPtrauthCheckAuthenticatedValue( + AUTVal, Scratch, AUTKey, AArch64PAuth::AuthCheckMethod::XPAC, EndSym); } // We already emitted unchecked and checked-but-non-trapping AUTs. @@ -2517,9 +2515,7 @@ void AArch64AsmPrinter::LowerMOVaddrPAC(const MachineInstr &MI) { : AArch64PACKey::DA); emitPtrauthCheckAuthenticatedValue(AArch64::X16, AArch64::X17, AuthKey, - AArch64PAuth::AuthCheckMethod::XPAC, - /*ShouldTrap=*/true, - /*OnFailure=*/nullptr); + AArch64PAuth::AuthCheckMethod::XPAC); } } else { EmitToStreamer(MCInstBuilder(AArch64::LDRXui) @@ -2652,9 +2648,7 @@ void AArch64AsmPrinter::LowerLOADgotAUTH(const MachineInstr &MI) { (AuthOpcode == AArch64::AUTIA ? AArch64PACKey::IA : AArch64PACKey::DA); emitPtrauthCheckAuthenticatedValue(AuthResultReg, AArch64::X17, AuthKey, - AArch64PAuth::AuthCheckMethod::XPAC, - /*ShouldTrap=*/true, - /*OnFailure=*/nullptr); + AArch64PAuth::AuthCheckMethod::XPAC); emitMovXReg(DstReg, AuthResultReg); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 1765d054a3c0d..123fc5bf37a19 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -437,6 +437,13 @@ std::pair RegBankLegalizeHelper::unpackAExt(Register Reg) { return {Lo.getReg(0), Hi.getReg(0)}; } +std::pair +RegBankLegalizeHelper::unpackAExtTruncS16(Register Reg) { + auto [Lo32, Hi32] = unpackAExt(Reg); + return {B.buildTrunc(SgprRB_S16, Lo32).getReg(0), + B.buildTrunc(SgprRB_S16, Hi32).getReg(0)}; +} + void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) { Register Lo, Hi; switch (MI.getOpcode()) { @@ -629,14 +636,21 @@ void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) { void RegBankLegalizeHelper::lowerSplitTo16(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); assert(MRI.getType(Dst) == V2S16); - auto [Op1Lo32, Op1Hi32] = unpackAExt(MI.getOperand(1).getReg()); - auto [Op2Lo32, Op2Hi32] = unpackAExt(MI.getOperand(2).getReg()); unsigned Opc = MI.getOpcode(); auto Flags = MI.getFlags(); - auto Op1Lo = B.buildTrunc(SgprRB_S16, Op1Lo32); - auto Op1Hi = B.buildTrunc(SgprRB_S16, Op1Hi32); - auto Op2Lo = B.buildTrunc(SgprRB_S16, Op2Lo32); - auto Op2Hi = B.buildTrunc(SgprRB_S16, Op2Hi32); + + if (MI.getNumOperands() == 2) { + auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg()); + auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo}, Flags); + auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi}, Flags); + B.buildMergeLikeInstr(Dst, {Lo, Hi}); + MI.eraseFromParent(); + return; + } + + assert(MI.getNumOperands() == 3); + auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg()); + auto [Op2Lo, Op2Hi] = unpackAExtTruncS16(MI.getOperand(2).getReg()); auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo, Op2Lo}, Flags); auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi, Op2Hi}, Flags); B.buildMergeLikeInstr(Dst, {Lo, Hi}); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h index e7598f888e4b5..4f1c3c02fa5d6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h @@ -118,6 +118,7 @@ class RegBankLegalizeHelper { std::pair unpackZExt(Register Reg); std::pair unpackSExt(Register Reg); std::pair unpackAExt(Register Reg); + std::pair unpackAExtTruncS16(Register Reg); void lowerUnpackBitShift(MachineInstr &MI); void lowerV_BFE(MachineInstr &MI); void lowerS_BFE(MachineInstr &MI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 097457f9f0deb..6ec51e1be8aca 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -955,6 +955,25 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32}}}) .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32, VgprV2S32}}}); + // FNEG and FABS are either folded as source modifiers or can be selected as + // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for + // targets without SALU float we still select them as VGPR since there would + // be no real sgpr use. + addRulesForGOpcs({G_FNEG, G_FABS}, Standard) + .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat) + .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat) + .Div(S16, {{Vgpr16}, {Vgpr16}}) + .Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat) + .Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat) + .Div(S32, {{Vgpr32}, {Vgpr32}}) + .Uni(S64, {{UniInVgprS64}, {Vgpr64}}) + .Div(S64, {{Vgpr64}, {Vgpr64}}) + .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat) + .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat) + .Div(V2S16, {{VgprV2S16}, {VgprV2S16}}) + .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}}) + .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}}); + addRulesForGOpcs({G_FPTOUI}) .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat) .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 8ef5874d7baf9..da287e0243d71 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -12,6 +12,7 @@ #include "SIDefines.h" #include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -1341,12 +1342,9 @@ void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI, return; O << Name; - for (int I = 0; I < NumOps; ++I) { - if (I != 0) - O << ','; - - O << !!(Ops[I] & Mod); - } + ListSeparator Sep(","); + for (int I = 0; I < NumOps; ++I) + O << Sep << !!(Ops[I] & Mod); if (HasDstSel) { O << ',' << !!(Ops[0] & SISrcMods::DST_OP_SEL); @@ -1584,14 +1582,10 @@ void AMDGPUInstPrinter::printGPRIdxMode(const MCInst *MI, unsigned OpNo, O << formatHex(static_cast(Val)); } else { O << "gpr_idx("; - bool NeedComma = false; + ListSeparator Sep(","); for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { - if (Val & (1 << ModeId)) { - if (NeedComma) - O << ','; - O << IdSymbolic[ModeId]; - NeedComma = true; - } + if (Val & (1 << ModeId)) + O << Sep << IdSymbolic[ModeId]; } O << ')'; } @@ -1798,25 +1792,16 @@ void AMDGPUInstPrinter::printSWaitCnt(const MCInst *MI, unsigned OpNo, bool IsDefaultLgkmcnt = Lgkmcnt == getLgkmcntBitMask(ISA); bool PrintAll = IsDefaultVmcnt && IsDefaultExpcnt && IsDefaultLgkmcnt; - bool NeedSpace = false; + ListSeparator Sep(" "); - if (!IsDefaultVmcnt || PrintAll) { - O << "vmcnt(" << Vmcnt << ')'; - NeedSpace = true; - } + if (!IsDefaultVmcnt || PrintAll) + O << Sep << "vmcnt(" << Vmcnt << ')'; - if (!IsDefaultExpcnt || PrintAll) { - if (NeedSpace) - O << ' '; - O << "expcnt(" << Expcnt << ')'; - NeedSpace = true; - } + if (!IsDefaultExpcnt || PrintAll) + O << Sep << "expcnt(" << Expcnt << ')'; - if (!IsDefaultLgkmcnt || PrintAll) { - if (NeedSpace) - O << ' '; - O << "lgkmcnt(" << Lgkmcnt << ')'; - } + if (!IsDefaultLgkmcnt || PrintAll) + O << Sep << "lgkmcnt(" << Lgkmcnt << ')'; } void AMDGPUInstPrinter::printDepCtr(const MCInst *MI, unsigned OpNo, @@ -1832,14 +1817,10 @@ void AMDGPUInstPrinter::printDepCtr(const MCInst *MI, unsigned OpNo, StringRef Name; unsigned Val; bool IsDefault; - bool NeedSpace = false; + ListSeparator Sep(" "); while (decodeDepCtr(Imm16, Id, Name, Val, IsDefault, STI)) { - if (!IsDefault || !HasNonDefaultVal) { - if (NeedSpace) - O << ' '; - O << Name << '(' << Val << ')'; - NeedSpace = true; - } + if (!IsDefault || !HasNonDefaultVal) + O << Sep << Name << '(' << Val << ')'; } } else { O << formatHex(Imm16); diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td index a7aa6274f5ac1..436b7eef600e7 100644 --- a/llvm/lib/Target/BPF/BPF.td +++ b/llvm/lib/Target/BPF/BPF.td @@ -31,6 +31,10 @@ def MisalignedMemAccess : SubtargetFeature<"allows-misaligned-mem-access", "AllowsMisalignedMemAccess", "true", "Allows misaligned memory access">; +def AllowBuiltinCall : SubtargetFeature<"allow-builtin-calls", + "AllowBuiltinCalls", "true", + "Allow calls to builtin functions">; + def : Proc<"generic", []>; def : Proc<"v1", []>; def : Proc<"v2", []>; diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index a8d1faa85116b..4485c41b4c0fa 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -208,6 +208,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, HasMovsx = STI.hasMovsx(); AllowsMisalignedMemAccess = STI.getAllowsMisalignedMemAccess(); + AllowBuiltinCalls = STI.getAllowBuiltinCalls(); } bool BPFTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align, @@ -567,9 +568,10 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) { if (StringRef(E->getSymbol()) != BPF_TRAP) { Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); - fail(CLI.DL, DAG, - Twine("A call to built-in function '" + StringRef(E->getSymbol()) + - "' is not supported.")); + if (!AllowBuiltinCalls) + fail(CLI.DL, DAG, + Twine("A call to built-in function '" + StringRef(E->getSymbol()) + + "' is not supported.")); } } @@ -1196,3 +1198,18 @@ bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL, return true; } + +bool BPFTargetLowering::shouldSignExtendTypeInLibCall(Type *Ty, + bool IsSigned) const { + return IsSigned || Ty->isIntegerTy(32); +} + +bool BPFTargetLowering::CanLowerReturn( + CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, + const SmallVectorImpl &Outs, LLVMContext &Context, + const Type *RetTy) const { + // At minimal return Outs.size() <= 1, or check valid types in CC. + SmallVector RVLocs; + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64); +} \ No newline at end of file diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h index 8607e4f8c9e69..a5036e31cb61d 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -68,6 +68,8 @@ class BPFTargetLowering : public TargetLowering { // Allows Misalignment bool AllowsMisalignedMemAccess; + bool AllowBuiltinCalls; + SDValue LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; @@ -163,6 +165,14 @@ class BPFTargetLowering : public TargetLowering { MachineBasicBlock * EmitInstrWithCustomInserterLDimm64(MachineInstr &MI, MachineBasicBlock *BB) const; + + // Returns true if arguments should be sign-extended in lib calls. + bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override; + + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool IsVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context, const Type *RetTy) const override; }; } diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp index 726f8f4b39827..77a1a5fe7444c 100644 --- a/llvm/lib/Target/BPF/BPFSubtarget.cpp +++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp @@ -70,6 +70,7 @@ void BPFSubtarget::initializeEnvironment() { HasLoadAcqStoreRel = false; HasGotox = false; AllowsMisalignedMemAccess = false; + AllowBuiltinCalls = false; } void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h index 24eff862224b0..40751fc9b7454 100644 --- a/llvm/lib/Target/BPF/BPFSubtarget.h +++ b/llvm/lib/Target/BPF/BPFSubtarget.h @@ -70,6 +70,8 @@ class BPFSubtarget : public BPFGenSubtargetInfo { bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm, HasLoadAcqStoreRel, HasGotox; + bool AllowBuiltinCalls; + std::unique_ptr CallLoweringInfo; std::unique_ptr InstSelector; std::unique_ptr Legalizer; @@ -101,6 +103,7 @@ class BPFSubtarget : public BPFGenSubtargetInfo { bool hasStoreImm() const { return HasStoreImm; } bool hasLoadAcqStoreRel() const { return HasLoadAcqStoreRel; } bool hasGotox() const { return HasGotox; } + bool getAllowBuiltinCalls() const { return AllowBuiltinCalls; } bool isLittleEndian() const { return IsLittleEndian; } diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index 9f1616f6960fe..5f18c37ef1125 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -29,20 +29,6 @@ static const int MaxVecSize = 4; using namespace llvm; -// Recursively creates an array-like version of a given vector type. -static Type *equivalentArrayTypeFromVector(Type *T) { - if (auto *VecTy = dyn_cast(T)) - return ArrayType::get(VecTy->getElementType(), - dyn_cast(VecTy)->getNumElements()); - if (auto *ArrayTy = dyn_cast(T)) { - Type *NewElementType = - equivalentArrayTypeFromVector(ArrayTy->getElementType()); - return ArrayType::get(NewElementType, ArrayTy->getNumElements()); - } - // If it's not a vector or array, return the original type. - return T; -} - class DXILDataScalarizationLegacy : public ModulePass { public: @@ -121,12 +107,25 @@ DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) { static bool isVectorOrArrayOfVectors(Type *T) { if (isa(T)) return true; - if (ArrayType *ArrType = dyn_cast(T)) - return isa(ArrType->getElementType()) || - isVectorOrArrayOfVectors(ArrType->getElementType()); + if (ArrayType *ArrayTy = dyn_cast(T)) + return isVectorOrArrayOfVectors(ArrayTy->getElementType()); return false; } +// Recursively creates an array-like version of a given vector type. +static Type *equivalentArrayTypeFromVector(Type *T) { + if (auto *VecTy = dyn_cast(T)) + return ArrayType::get(VecTy->getElementType(), + dyn_cast(VecTy)->getNumElements()); + if (auto *ArrayTy = dyn_cast(T)) { + Type *NewElementType = + equivalentArrayTypeFromVector(ArrayTy->getElementType()); + return ArrayType::get(NewElementType, ArrayTy->getNumElements()); + } + // If it's not a vector or array, return the original type. + return T; +} + bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) { Type *AllocatedType = AI.getAllocatedType(); if (!isVectorOrArrayOfVectors(AllocatedType)) @@ -135,7 +134,7 @@ bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) { IRBuilder<> Builder(&AI); Type *NewType = equivalentArrayTypeFromVector(AllocatedType); AllocaInst *ArrAlloca = - Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarize"); + Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarized"); ArrAlloca->setAlignment(AI.getAlign()); AI.replaceAllUsesWith(ArrAlloca); AI.eraseFromParent(); @@ -303,78 +302,44 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { GEPOperator *GOp = cast(&GEPI); Value *PtrOperand = GOp->getPointerOperand(); - Type *NewGEPType = GOp->getSourceElementType(); - - // Unwrap GEP ConstantExprs to find the base operand and element type - while (auto *GEPCE = dyn_cast_or_null( - dyn_cast(PtrOperand))) { - GOp = GEPCE; - PtrOperand = GEPCE->getPointerOperand(); - NewGEPType = GEPCE->getSourceElementType(); - } - - Type *const OrigGEPType = NewGEPType; - Value *const OrigOperand = PtrOperand; - - if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) { - NewGEPType = NewGlobal->getValueType(); - PtrOperand = NewGlobal; - } else if (AllocaInst *Alloca = dyn_cast(PtrOperand)) { - Type *AllocatedType = Alloca->getAllocatedType(); - if (isa(AllocatedType) && - AllocatedType != GOp->getResultElementType()) - NewGEPType = AllocatedType; - } else - return false; // Only GEPs into an alloca or global variable are considered - - // Defer changing i8 GEP types until dxil-flatten-arrays - if (OrigGEPType->isIntegerTy(8)) - NewGEPType = OrigGEPType; - - // If the original type is a "sub-type" of the new type, then ensure the gep - // correctly zero-indexes the extra dimensions to keep the offset calculation - // correct. - // Eg: - // i32, [4 x i32] and [8 x [4 x i32]] are sub-types of [8 x [4 x i32]], etc. - // - // So then: - // gep [4 x i32] %idx - // -> gep [8 x [4 x i32]], i32 0, i32 %idx - // gep i32 %idx - // -> gep [8 x [4 x i32]], i32 0, i32 0, i32 %idx - uint32_t MissingDims = 0; - Type *SubType = NewGEPType; - - // The new type will be in its array version; so match accordingly. - Type *const GEPArrType = equivalentArrayTypeFromVector(OrigGEPType); - - while (SubType != GEPArrType) { - MissingDims++; - - ArrayType *ArrType = dyn_cast(SubType); - if (!ArrType) { - assert(SubType == GEPArrType && - "GEP uses an DXIL invalid sub-type of alloca/global variable"); - break; - } - - SubType = ArrType->getElementType(); + Type *GEPType = GOp->getSourceElementType(); + + // Replace a GEP ConstantExpr pointer operand with a GEP instruction so that + // it can be visited + if (auto *PtrOpGEPCE = dyn_cast(PtrOperand); + PtrOpGEPCE && PtrOpGEPCE->getOpcode() == Instruction::GetElementPtr) { + GetElementPtrInst *OldGEPI = + cast(PtrOpGEPCE->getAsInstruction()); + OldGEPI->insertBefore(GEPI.getIterator()); + + IRBuilder<> Builder(&GEPI); + SmallVector Indices(GEPI.indices()); + Value *NewGEP = + Builder.CreateGEP(GEPI.getSourceElementType(), OldGEPI, Indices, + GEPI.getName(), GEPI.getNoWrapFlags()); + assert(isa(NewGEP) && + "Expected newly-created GEP to be an instruction"); + GetElementPtrInst *NewGEPI = cast(NewGEP); + + GEPI.replaceAllUsesWith(NewGEPI); + GEPI.eraseFromParent(); + visitGetElementPtrInst(*OldGEPI); + visitGetElementPtrInst(*NewGEPI); + return true; } - bool NeedsTransform = OrigOperand != PtrOperand || - OrigGEPType != NewGEPType || MissingDims != 0; + Type *NewGEPType = equivalentArrayTypeFromVector(GEPType); + Value *NewPtrOperand = PtrOperand; + if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) + NewPtrOperand = NewGlobal; + bool NeedsTransform = NewPtrOperand != PtrOperand || NewGEPType != GEPType; if (!NeedsTransform) return false; IRBuilder<> Builder(&GEPI); - SmallVector Indices; - - for (uint32_t I = 0; I < MissingDims; I++) - Indices.push_back(Builder.getInt32(0)); - llvm::append_range(Indices, GOp->indices()); - - Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices, + SmallVector Indices(GOp->idx_begin(), GOp->idx_end()); + Value *NewGEP = Builder.CreateGEP(NewGEPType, NewPtrOperand, Indices, GOp->getName(), GOp->getNoWrapFlags()); GOp->replaceAllUsesWith(NewGEP); diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 632b44c3cf635..a59586bdb4476 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -554,6 +554,7 @@ static void cacheDIVar(FrameDataInfo &FrameData, DIVarCache.insert({V, (*I)->getVariable()}); }; CacheIt(findDVRDeclares(V)); + CacheIt(findDVRDeclareValues(V)); } } @@ -1142,6 +1143,47 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) { for_each(DVRs, SalvageOne); } + TinyPtrVector DVRDeclareValues = + findDVRDeclareValues(Def); + // Try best to find dbg.declare_value. If the spill is a temp, there may + // not be a direct dbg.declare_value. Walk up the load chain to find one + // from an alias. + if (F->getSubprogram()) { + auto *CurDef = Def; + while (DVRDeclareValues.empty() && isa(CurDef)) { + auto *LdInst = cast(CurDef); + // Only consider ptr to ptr same type load. + if (LdInst->getPointerOperandType() != LdInst->getType()) + break; + CurDef = LdInst->getPointerOperand(); + if (!isa(CurDef)) + break; + DVRDeclareValues = findDVRDeclareValues(CurDef); + } + } + + auto SalvageOneCoro = [&](auto *DDI) { + // This dbg.declare_value is preserved for all coro-split function + // fragments. It will be unreachable in the main function, and + // processed by coro::salvageDebugInfo() by the Cloner. However, convert + // it to a dbg.declare to make sure future passes don't have to deal + // with a dbg.declare_value. + auto *VAM = ValueAsMetadata::get(CurrentReload); + Type *Ty = VAM->getValue()->getType(); + // If the metadata type is not a pointer, emit a dbg.value instead. + DbgVariableRecord *NewDVR = new DbgVariableRecord( + ValueAsMetadata::get(CurrentReload), DDI->getVariable(), + DDI->getExpression(), DDI->getDebugLoc(), + Ty->isPointerTy() ? DbgVariableRecord::LocationType::Declare + : DbgVariableRecord::LocationType::Value); + Builder.GetInsertPoint()->getParent()->insertDbgRecordBefore( + NewDVR, Builder.GetInsertPoint()); + // This dbg.declare_value is for the main function entry point. It + // will be deleted in all coro-split functions. + coro::salvageDebugInfo(ArgToAllocaMap, *DDI, false /*UseEntryValue*/); + }; + for_each(DVRDeclareValues, SalvageOneCoro); + // If we have a single edge PHINode, remove it and replace it with a // reload from the coroutine frame. (We already took care of multi edge // PHINodes by normalizing them in the rewritePHIs function). @@ -1925,7 +1967,7 @@ void coro::salvageDebugInfo( Function *F = DVR.getFunction(); // Follow the pointer arithmetic all the way to the incoming // function argument and convert into a DIExpression. - bool SkipOutermostLoad = DVR.isDbgDeclare(); + bool SkipOutermostLoad = DVR.isDbgDeclare() || DVR.isDbgDeclareValue(); Value *OriginalStorage = DVR.getVariableLocationOp(0); auto SalvagedInfo = @@ -1939,10 +1981,11 @@ void coro::salvageDebugInfo( DVR.replaceVariableLocationOp(OriginalStorage, Storage); DVR.setExpression(Expr); - // We only hoist dbg.declare today since it doesn't make sense to hoist - // dbg.value since it does not have the same function wide guarantees that - // dbg.declare does. - if (DVR.getType() == DbgVariableRecord::LocationType::Declare) { + // We only hoist dbg.declare and dbg.declare_value today since it doesn't make + // sense to hoist dbg.value since it does not have the same function wide + // guarantees that dbg.declare does. + if (DVR.getType() == DbgVariableRecord::LocationType::Declare || + DVR.getType() == DbgVariableRecord::LocationType::DeclareValue) { std::optional InsertPt; if (auto *I = dyn_cast(Storage)) { InsertPt = I->getInsertionPointAfterDef(); @@ -1957,6 +2000,19 @@ void coro::salvageDebugInfo( InsertPt = F->getEntryBlock().begin(); if (InsertPt) { DVR.removeFromParent(); + // If there is a dbg.declare_value being reinserted, insert it as a + // dbg.declare instead, so that subsequent passes don't have to deal with + // a dbg.declare_value. + if (DVR.getType() == DbgVariableRecord::LocationType::DeclareValue) { + auto *MD = DVR.getRawLocation(); + if (auto *VAM = dyn_cast(MD)) { + Type *Ty = VAM->getValue()->getType(); + if (Ty->isPointerTy()) + DVR.Type = DbgVariableRecord::LocationType::Declare; + else + DVR.Type = DbgVariableRecord::LocationType::Value; + } + } (*InsertPt)->getParent()->insertDbgRecordBefore(&DVR, *InsertPt); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8a435accfedfe..0c7d9c0193a03 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1854,12 +1854,6 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { return getOperand(I + 1)->isDefinedOutsideLoopRegions(); } - bool areAllOperandsInvariant() const { - return all_of(operands(), [](VPValue *Op) { - return Op->isDefinedOutsideLoopRegions(); - }); - } - public: VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef Operands, const VPIRFlags &Flags = {}, @@ -1898,14 +1892,7 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool usesFirstLaneOnly(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - if (Op == getOperand(0)) - return isPointerLoopInvariant(); - else - return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions(); - } + bool usesFirstLaneOnly(const VPValue *Op) const override; protected: #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 5ea9dd349e06f..54fdec3bcf4a1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2536,6 +2536,11 @@ void VPScalarIVStepsRecipe::printRecipe(raw_ostream &O, const Twine &Indent, } #endif +bool VPWidenGEPRecipe::usesFirstLaneOnly(const VPValue *Op) const { + assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); + return vputils::isSingleScalar(Op); +} + void VPWidenGEPRecipe::execute(VPTransformState &State) { assert(State.VF.isVector() && "not widening"); // Construct a vector GEP by widening the operands of the scalar GEP as @@ -2544,51 +2549,32 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { // is vector-typed. Thus, to keep the representation compact, we only use // vector-typed operands for loop-varying values. - if (areAllOperandsInvariant()) { - // If we are vectorizing, but the GEP has only loop-invariant operands, - // the GEP we build (by only using vector-typed operands for - // loop-varying values) would be a scalar pointer. Thus, to ensure we - // produce a vector of pointers, we need to either arbitrarily pick an - // operand to broadcast, or broadcast a clone of the original GEP. - // Here, we broadcast a clone of the original. - // - // TODO: If at some point we decide to scalarize instructions having - // loop-invariant operands, this special case will no longer be - // required. We would add the scalarization decision to - // collectLoopScalars() and teach getVectorValue() to broadcast - // the lane-zero scalar value. - SmallVector Ops; - for (unsigned I = 0, E = getNumOperands(); I != E; I++) - Ops.push_back(State.get(getOperand(I), VPLane(0))); - - auto *NewGEP = - State.Builder.CreateGEP(getSourceElementType(), Ops[0], drop_begin(Ops), - "", getGEPNoWrapFlags()); - Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP); - State.set(this, Splat); - } else { - // If the GEP has at least one loop-varying operand, we are sure to - // produce a vector of pointers unless VF is scalar. - // The pointer operand of the new GEP. If it's loop-invariant, we - // won't broadcast it. - auto *Ptr = State.get(getOperand(0), isPointerLoopInvariant()); - - // Collect all the indices for the new GEP. If any index is - // loop-invariant, we won't broadcast it. - SmallVector Indices; - for (unsigned I = 1, E = getNumOperands(); I < E; I++) { - VPValue *Operand = getOperand(I); - Indices.push_back(State.get(Operand, isIndexLoopInvariant(I - 1))); - } - - // Create the new GEP. Note that this GEP may be a scalar if VF == 1, - // but it should be a vector, otherwise. - auto *NewGEP = State.Builder.CreateGEP(getSourceElementType(), Ptr, Indices, - "", getGEPNoWrapFlags()); - assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && - "NewGEP is not a pointer vector"); - State.set(this, NewGEP); - } + assert( + any_of(operands(), + [](VPValue *Op) { return !Op->isDefinedOutsideLoopRegions(); }) && + "Expected at least one loop-variant operand"); + + // If the GEP has at least one loop-varying operand, we are sure to + // produce a vector of pointers unless VF is scalar. + // The pointer operand of the new GEP. If it's loop-invariant, we + // won't broadcast it. + auto *Ptr = State.get(getOperand(0), isPointerLoopInvariant()); + + // Collect all the indices for the new GEP. If any index is + // loop-invariant, we won't broadcast it. + SmallVector Indices; + for (unsigned I = 1, E = getNumOperands(); I < E; I++) { + VPValue *Operand = getOperand(I); + Indices.push_back(State.get(Operand, isIndexLoopInvariant(I - 1))); + } + + // Create the new GEP. Note that this GEP may be a scalar if VF == 1, + // but it should be a vector, otherwise. + auto *NewGEP = State.Builder.CreateGEP(getSourceElementType(), Ptr, Indices, + "", getGEPNoWrapFlags()); + assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && + "NewGEP is not a pointer vector"); + State.set(this, NewGEP); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index e7a8773be067b..89b490e960f33 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1451,7 +1451,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) { for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { - if (!isa(&R)) + if (!isa(&R)) continue; auto *RepR = dyn_cast(&R); if (RepR && (RepR->isSingleScalar() || RepR->isPredicated())) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll index 57481724936a3..cab2741be9929 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -12,7 +12,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) { ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -46,13 +46,13 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $w1, $x0, $x2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def $x9, pcsections !0 :: (load (s32) from %ir.pnew) + ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def renamable $x9, pcsections !0 :: (load (s32) from %ir.pnew) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.cmpxchg.start: ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) ; CHECK-NEXT: liveins: $w1, $x0, $x9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -91,7 +91,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) { ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -243,7 +243,7 @@ define i32 @fetch_and_nand(ptr %p) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w8, 2, pcsections !0 ; CHECK-NEXT: $w9 = ORNWrs $wzr, killed renamable $w9, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRW killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p) @@ -295,7 +295,7 @@ define i32 @fetch_and_or(ptr %p) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: $w10 = ORRWrs renamable $w8, renamable $w9, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRW killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 @@ -726,7 +726,7 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -750,7 +750,7 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -773,7 +773,7 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -797,7 +797,7 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -821,7 +821,7 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -845,7 +845,7 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -869,7 +869,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0 @@ -895,7 +895,7 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0 @@ -923,10 +923,10 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -951,10 +951,10 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -977,7 +977,7 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -1001,7 +1001,7 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: early-clobber renamable $w9 = STXRH renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1024,7 +1024,7 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -1048,7 +1048,7 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -1072,7 +1072,7 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -1096,7 +1096,7 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -1120,7 +1120,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0 @@ -1146,7 +1146,7 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0 @@ -1174,10 +1174,10 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1202,10 +1202,10 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1230,7 +1230,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) { ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) ; CHECK-NEXT: liveins: $w1, $w2, $x8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 7, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 @@ -1273,7 +1273,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) { ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) ; CHECK-NEXT: liveins: $w1, $w2, $x8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 15, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll index 50fac819d4afe..e37d8f7da4bfc 100644 --- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll +++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios < %s | FileCheck %s +; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=false < %s | sed -e "/; kill: /d" | FileCheck %s +; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=true < %s | FileCheck %s ; Check there's no assert in spilling from implicit-def operands on an ; IMPLICIT_DEF. @@ -92,7 +93,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a ; CHECK-NEXT: ldr x8, [sp, #40] ; 8-byte Reload ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: mov x1, xzr -; CHECK-NEXT: ; kill: def $w8 killed $w8 killed $x8 def $x8 ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl _fprintf ; CHECK-NEXT: brk #0x1 diff --git a/llvm/test/CodeGen/AArch64/pr151592.mir b/llvm/test/CodeGen/AArch64/pr151592.mir new file mode 100644 index 0000000000000..dbcc1f8c08e9a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr151592.mir @@ -0,0 +1,168 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=aarch64 -run-pass=register-coalescer -o - %s | FileCheck %s +--- +name: reproducer +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: reproducer + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $w0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: undef [[MOVIv2d_ns:%[0-9]+]].qsub1:zpr2 = MOVIv2d_ns 0, implicit-def [[MOVIv2d_ns]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64sp = COPY $xzr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.3 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]].dsub:zpr2 = LDRDui [[COPY2]], 0, implicit-def [[MOVIv2d_ns]].zsub + ; CHECK-NEXT: ST2Twov2d [[MOVIv2d_ns]].zsub_qsub1, [[COPY]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: STR_ZXI [[MOVIv2d_ns]].zsub1, [[COPY]], 1 + ; CHECK-NEXT: STR_ZXI [[MOVIv2d_ns]].zsub1, [[COPY]], 0 + ; CHECK-NEXT: B %bb.1 + bb.0: + liveins: $w0, $x1 + %0:gpr64common = COPY $x1 + %1:gpr32 = COPY $w0 + %2:gpr32 = COPY %1:gpr32 + undef %8.qsub1:qq = MOVIv2d_ns 0 + %4:zpr = SUBREG_TO_REG 0, %8.qsub1:qq, %subreg.zsub + %5:gpr64sp = COPY $xzr + + bb.1: + TBNZW %2:gpr32, 0, %bb.3 + B %bb.2 + + bb.2: + %8.dsub:qq = LDRDui %5:gpr64sp, 0, implicit-def %8.qsub0:qq + ST2Twov2d %8:qq, %0:gpr64common + + bb.3: + STR_ZXI %4:zpr, %0:gpr64common, 1 + STR_ZXI %4:zpr, %0:gpr64common, 0 + B %bb.1 +... +--- +name: reproducer2 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: reproducer2 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $w0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: undef [[MOVIv2d_ns:%[0-9]+]].zsub:zpr2 = MOVIv2d_ns 0 + ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]].qsub1:zpr2 = MOVIv2d_ns 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64sp = COPY $xzr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.3 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]].dsub:zpr2 = LDRDui [[COPY2]], 0, implicit-def [[MOVIv2d_ns]].zsub + ; CHECK-NEXT: ST2Twov2d [[MOVIv2d_ns]].zsub_qsub1, [[COPY]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: STR_ZXI [[MOVIv2d_ns]].zsub1, [[COPY]], 1 + ; CHECK-NEXT: STR_ZXI [[MOVIv2d_ns]].zsub1, [[COPY]], 0 + ; CHECK-NEXT: B %bb.1 + bb.0: + liveins: $w0, $x1 + %0:gpr64common = COPY $x1 + %1:gpr32 = COPY $w0 + %2:gpr32 = COPY %1:gpr32 + undef %8.qsub0:qq = MOVIv2d_ns 0 + %8.qsub1:qq = MOVIv2d_ns 0 + %4:zpr = SUBREG_TO_REG 0, %8.qsub1:qq, %subreg.zsub + %5:gpr64sp = COPY $xzr + + bb.1: + TBNZW %2:gpr32, 0, %bb.3 + B %bb.2 + + bb.2: + %8.dsub:qq = LDRDui %5:gpr64sp, 0, implicit-def %8.qsub0:qq + ST2Twov2d %8:qq, %0:gpr64common + + bb.3: + STR_ZXI %4:zpr, %0:gpr64common, 1 + STR_ZXI %4:zpr, %0:gpr64common, 0 + B %bb.1 +... +--- +name: reproducer3 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: reproducer3 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $w0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: undef [[MOVIv2d_ns:%[0-9]+]].qsub1:zpr2 = MOVIv2d_ns 0 + ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]].zsub:zpr2 = MOVIv2d_ns 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64sp = COPY $xzr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.3 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]].dsub1:zpr2 = LDRDui [[COPY2]], 0, implicit-def [[MOVIv2d_ns]].qsub1 + ; CHECK-NEXT: ST2Twov2d [[MOVIv2d_ns]].zsub_qsub1, [[COPY]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: STR_ZXI [[MOVIv2d_ns]].zsub0, [[COPY]], 1 + ; CHECK-NEXT: STR_ZXI [[MOVIv2d_ns]].zsub0, [[COPY]], 0 + ; CHECK-NEXT: B %bb.1 + bb.0: + liveins: $w0, $x1 + %0:gpr64common = COPY $x1 + %1:gpr32 = COPY $w0 + %2:gpr32 = COPY %1:gpr32 + undef %8.qsub1:qq = MOVIv2d_ns 0 + %8.qsub0:qq = MOVIv2d_ns 0 + %4:zpr = SUBREG_TO_REG 0, %8.qsub0:qq, %subreg.zsub + %5:gpr64sp = COPY $xzr + + bb.1: + TBNZW %2:gpr32, 0, %bb.3 + B %bb.2 + + bb.2: + %8.dsub1:qq = LDRDui %5:gpr64sp, 0, implicit-def %8.qsub1:qq + ST2Twov2d %8:qq, %0:gpr64common + + bb.3: + STR_ZXI %4:zpr, %0:gpr64common, 1 + STR_ZXI %4:zpr, %0:gpr64common, 0 + B %bb.1 +... diff --git a/llvm/test/CodeGen/AArch64/pr151888.mir b/llvm/test/CodeGen/AArch64/pr151888.mir new file mode 100644 index 0000000000000..5b66f136cbc4f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr151888.mir @@ -0,0 +1,17 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=aarch64 -run-pass=register-coalescer -o - %s | FileCheck %s +--- +name: reproducer +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: reproducer + ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri $xzr, 0, 31 + ; CHECK-NEXT: $x0 = COPY [[UBFMXri]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %1:gpr32 = COPY killed $wzr + %2:gpr64 = SUBREG_TO_REG 0, %1:gpr32, %subreg.sub_32 + %3:gpr64 = UBFMXri %2:gpr64, 0, 31 + $x0 = COPY %3:gpr64 + RET_ReallyLR implicit $x0 +... diff --git a/llvm/test/CodeGen/AArch64/pr164181-reduced.ll b/llvm/test/CodeGen/AArch64/pr164181-reduced.ll new file mode 100644 index 0000000000000..192893e6a08cc --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr164181-reduced.ll @@ -0,0 +1,183 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; +; This is a reduced version of pr164181.ll, which failed with subreg liveness enabled +; after adding the implicit-def for a SUBREG_TO_REG to mark the top 32-bits of a register +; being written by a MOVi32imm instruction. This previously failed the machine verifier +; because the liverange for the top 32-bits weren't updated when rematerializing the +; MOVi32imm. +; +; RUN: llc -verify-machineinstrs -enable-subreg-liveness=true < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -enable-subreg-liveness=false < %s | FileCheck %s +target triple = "aarch64-unknown-linux-gnu" + +define void @f(i1 %var_0, i64 %var_2, i64 %var_11, ptr %arr_3, ptr %arr_4, ptr %arr_7, ptr %arr_13, ptr %invariant.gep875.us, ptr %arrayidx384.us, i16 %0, i1 %tobool435.not.us, ptr %gep876.us, i16 %cond464.in.us, ptr %1, i16 %conv227.us, i1 %cmp378.us) #0 { +; CHECK-LABEL: f: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-96]! // 8-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w23, -40 +; CHECK-NEXT: .cfi_offset w24, -48 +; CHECK-NEXT: .cfi_offset w25, -56 +; CHECK-NEXT: .cfi_offset w26, -64 +; CHECK-NEXT: .cfi_offset w27, -72 +; CHECK-NEXT: .cfi_offset w28, -80 +; CHECK-NEXT: .cfi_offset w30, -96 +; CHECK-NEXT: ldrb w9, [sp, #152] +; CHECK-NEXT: ldrh w10, [sp, #144] +; CHECK-NEXT: mov x19, #-18403 // =0xffffffffffffb81d +; CHECK-NEXT: ldr x11, [sp, #136] +; CHECK-NEXT: ldrh w12, [sp, #128] +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: ldr x13, [sp, #120] +; CHECK-NEXT: ldrb w14, [sp, #112] +; CHECK-NEXT: mov w15, wzr +; CHECK-NEXT: ldrh w16, [sp, #104] +; CHECK-NEXT: ldr x17, [sp, #96] +; CHECK-NEXT: mov w18, #149 // =0x95 +; CHECK-NEXT: movk x19, #58909, lsl #16 +; CHECK-NEXT: mov w20, #-18978 // =0xffffb5de +; CHECK-NEXT: mov w21, #1 // =0x1 +; CHECK-NEXT: mov w22, #-7680 // =0xffffe200 +; CHECK-NEXT: mov w23, #36006 // =0x8ca6 +; CHECK-NEXT: mov x25, xzr +; CHECK-NEXT: mov x24, xzr +; CHECK-NEXT: .LBB0_1: // %for.body99.us +; CHECK-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NEXT: // Child Loop BB0_4 Depth 2 +; CHECK-NEXT: // Child Loop BB0_10 Depth 2 +; CHECK-NEXT: mov w27, w15 +; CHECK-NEXT: mov x26, x25 +; CHECK-NEXT: mov x28, x24 +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .LBB0_2: // in Loop: Header=BB0_4 Depth=2 +; CHECK-NEXT: mov w25, #1 // =0x1 +; CHECK-NEXT: .LBB0_3: // %for.inc371.us +; CHECK-NEXT: // in Loop: Header=BB0_4 Depth=2 +; CHECK-NEXT: mul w27, w15, w20 +; CHECK-NEXT: mov x28, xzr +; CHECK-NEXT: mov x26, x2 +; CHECK-NEXT: tbz w0, #0, .LBB0_9 +; CHECK-NEXT: .LBB0_4: // %for.body194.us +; CHECK-NEXT: // Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NEXT: orr x15, x28, x19 +; CHECK-NEXT: mov x24, x28 +; CHECK-NEXT: strh w10, [x13] +; CHECK-NEXT: strb w18, [x5] +; CHECK-NEXT: str x15, [x4] +; CHECK-NEXT: mov w15, w27 +; CHECK-NEXT: str x8, [x11] +; CHECK-NEXT: str x1, [x3] +; CHECK-NEXT: tbz w14, #0, .LBB0_2 +; CHECK-NEXT: // %bb.5: // %if.then327.us +; CHECK-NEXT: // in Loop: Header=BB0_4 Depth=2 +; CHECK-NEXT: cbnz w21, .LBB0_7 +; CHECK-NEXT: // %bb.6: // %cond.true331.us +; CHECK-NEXT: // in Loop: Header=BB0_4 Depth=2 +; CHECK-NEXT: ldrsb w27, [x8] +; CHECK-NEXT: b .LBB0_8 +; CHECK-NEXT: .LBB0_7: // in Loop: Header=BB0_4 Depth=2 +; CHECK-NEXT: mov w27, wzr +; CHECK-NEXT: .LBB0_8: // %cond.end345.us +; CHECK-NEXT: // in Loop: Header=BB0_4 Depth=2 +; CHECK-NEXT: mov x25, xzr +; CHECK-NEXT: strh w27, [x3] +; CHECK-NEXT: str x26, [x6] +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_9: // %for.cond376.preheader.us +; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: mov x26, xzr +; CHECK-NEXT: mov w27, wzr +; CHECK-NEXT: .LBB0_10: // %for.body380.us +; CHECK-NEXT: // Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ands w28, w0, #0x1 +; CHECK-NEXT: orr x26, x26, #0x1 +; CHECK-NEXT: strh w16, [x7] +; CHECK-NEXT: csel w30, w23, w22, ne +; CHECK-NEXT: tst w12, #0xffff +; CHECK-NEXT: csel w21, wzr, w27, eq +; CHECK-NEXT: cmp w28, #0 +; CHECK-NEXT: str w30, [x17] +; CHECK-NEXT: csel w27, w27, w21, ne +; CHECK-NEXT: tbnz w9, #0, .LBB0_10 +; CHECK-NEXT: // %bb.11: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: mov w21, #1 // =0x1 +; CHECK-NEXT: b .LBB0_1 +entry: + br label %for.body99.us + +for.body99.us: ; preds = %for.inc505.us, %entry + %mul287985.us = phi i16 [ 0, %entry ], [ %mul287986.us, %for.inc505.us ] + %mul354905.us = phi i64 [ 0, %entry ], [ %mul354907.us, %for.inc505.us ] + %sub283896.us = phi i64 [ 0, %entry ], [ %sub283897.us, %for.inc505.us ] + %conv96880.us4 = phi i64 [ 0, %entry ], [ 0, %for.inc505.us ] + br label %for.body194.us + +for.body380.us: ; preds = %for.cond376.preheader.us, %for.inc505.us + %indvars.iv10181 = phi i64 [ 0, %for.cond376.preheader.us ], [ %indvars.iv.next1019, %for.inc505.us ] + %2 = phi i8 [ 0, %for.cond376.preheader.us ], [ %3, %for.inc505.us ] + store i16 %0, ptr %invariant.gep875.us, align 2 + %arrayidx416.us = getelementptr i16, ptr %arr_13, i64 %indvars.iv10181 + %conv419.us = select i1 %var_0, i32 36006, i32 -7680 + store i32 %conv419.us, ptr %arrayidx384.us, align 4 + br i1 %var_0, label %for.inc505.us, label %if.then436.us + +if.then436.us: ; preds = %for.body380.us + %cond464.in.us6 = load i16, ptr null, align 2 + %tobool465.not.us = icmp eq i16 %cond464.in.us, 0 + %spec.select = select i1 %tobool465.not.us, i8 0, i8 %2 + br label %for.inc505.us + +for.inc505.us: ; preds = %if.then436.us, %for.body380.us + %3 = phi i8 [ %2, %for.body380.us ], [ %spec.select, %if.then436.us ] + %indvars.iv.next1019 = or i64 %indvars.iv10181, 1 + br i1 %cmp378.us, label %for.body380.us, label %for.body99.us + +for.body194.us: ; preds = %for.inc371.us, %for.body99.us + %mul287986.us = phi i16 [ %mul287985.us, %for.body99.us ], [ %mul287.us, %for.inc371.us ] + %mul354906.us = phi i64 [ %mul354905.us, %for.body99.us ], [ %var_11, %for.inc371.us ] + %sub283897.us = phi i64 [ %sub283896.us, %for.body99.us ], [ 0, %for.inc371.us ] + store i16 %conv227.us, ptr %gep876.us, align 2 + store i8 -107, ptr %arr_7, align 1 + %sub283.us = or i64 %sub283897.us, -434259939 + store i64 %sub283.us, ptr %arr_4, align 8 + %mul287.us = mul i16 %mul287986.us, -18978 + store i64 0, ptr %1, align 8 + store i64 %var_2, ptr %arr_3, align 8 + br i1 %tobool435.not.us, label %if.then327.us, label %for.inc371.us + +if.then327.us: ; preds = %for.body194.us + %tobool330.not.us = icmp eq i32 0, 0 + br i1 %tobool330.not.us, label %cond.end345.us, label %cond.true331.us + +cond.true331.us: ; preds = %if.then327.us + %4 = load i8, ptr null, align 1 + %5 = sext i8 %4 to i16 + br label %cond.end345.us + +cond.end345.us: ; preds = %cond.true331.us, %if.then327.us + %cond346.us = phi i16 [ %5, %cond.true331.us ], [ 0, %if.then327.us ] + store i16 %cond346.us, ptr %arr_3, align 2 + store i64 %mul354906.us, ptr %arr_13, align 8 + br label %for.inc371.us + +for.inc371.us: ; preds = %cond.end345.us, %for.body194.us + %mul354907.us = phi i64 [ 1, %for.body194.us ], [ 0, %cond.end345.us ] + br i1 %var_0, label %for.body194.us, label %for.cond376.preheader.us + +for.cond376.preheader.us: ; preds = %for.inc371.us + %arrayidx384.us9 = getelementptr i16, ptr null, i64 %conv96880.us4 + br label %for.body380.us +} + +attributes #0 = { "frame-pointer"="non-leaf" } diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll index 2a77d4dd33fe5..4206c0bc26991 100644 --- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll @@ -27,11 +27,12 @@ define i32 @caller() nounwind ssp { ; CHECK-NEXT: sub sp, sp, #208 ; CHECK-NEXT: mov w8, #10 ; =0xa ; CHECK-NEXT: mov w9, #9 ; =0x9 -; CHECK-NEXT: mov w10, #8 ; =0x8 +; CHECK-NEXT: mov w0, #1 ; =0x1 ; CHECK-NEXT: stp x9, x8, [sp, #24] -; CHECK-NEXT: mov w8, #7 ; =0x7 +; CHECK-NEXT: mov w8, #8 ; =0x8 ; CHECK-NEXT: mov w9, #6 ; =0x6 -; CHECK-NEXT: mov w0, #1 ; =0x1 +; CHECK-NEXT: str x8, [sp, #16] +; CHECK-NEXT: mov w8, #7 ; =0x7 ; CHECK-NEXT: mov w1, #2 ; =0x2 ; CHECK-NEXT: mov w2, #3 ; =0x3 ; CHECK-NEXT: mov w3, #4 ; =0x4 @@ -46,8 +47,7 @@ define i32 @caller() nounwind ssp { ; CHECK-NEXT: stp x22, x21, [sp, #160] ; 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #176] ; 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #192] ; 16-byte Folded Spill -; CHECK-NEXT: stp x8, x10, [sp, #8] -; CHECK-NEXT: str x9, [sp] +; CHECK-NEXT: stp x9, x8, [sp] ; CHECK-NEXT: bl _callee ; CHECK-NEXT: ldp x29, x30, [sp, #192] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #176] ; 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir new file mode 100644 index 0000000000000..a58a23068896b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir @@ -0,0 +1,45 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=false -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=true -o - %s | FileCheck %s +--- +name: test +tracksRegLiveness: true +body: | + bb.0: + liveins: $x1 + ; CHECK-LABEL: name: test + ; CHECK: liveins: $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x0 = COPY $x1 + ; CHECK-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def renamable $x1 + ; CHECK-NEXT: RET_ReallyLR implicit $x1, implicit $x0 + %190:gpr64 = COPY killed $x1 + %191:gpr32 = COPY %190.sub_32:gpr64 + %192:gpr32 = ORRWrr $wzr, killed %191:gpr32 + %193:gpr64all = SUBREG_TO_REG 0, killed %192:gpr32, %subreg.sub_32 + $x0 = COPY killed %190:gpr64 + $x1 = COPY killed %193:gpr64all + RET_ReallyLR implicit $x1, implicit $x0 +... + +# In this test, we should avoid adding an implicit-def to ORRXri, because +# the register class will already be gpr64sp. +--- +name: test2 +tracksRegLiveness: true +frameInfo: + adjustsStack: true +body: | + bb.0: + ; CHECK-LABEL: name: test2 + ; CHECK: renamable $x8 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $x9 = ORRXri renamable $x8, 8128 + ; CHECK-NEXT: $x2 = ORRXri renamable $x8, 8128 + ; CHECK-NEXT: RET_ReallyLR implicit killed renamable $x8, implicit killed renamable $x9 + %0:gpr64 = IMPLICIT_DEF + %1:gpr64sp = ORRXri %0, 8128 + %3:gpr64 = SUBREG_TO_REG 0, %1.sub_32, %subreg.sub_32 + %2:gpr64all = COPY killed %1 + $x2 = COPY killed %2 + RET_ReallyLR implicit %0, implicit %3 +... diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir index 08fc47d9480ce..eb6242ce9940d 100644 --- a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir +++ b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir @@ -7,9 +7,18 @@ # CHECK-DBG: ********** JOINING INTERVALS *********** # CHECK-DBG: ********** INTERVALS ********** # CHECK-DBG: %0 [16r,32r:0) 0@16r weight:0.000000e+00 -# CHECK-DBG: %3 [48r,112r:0) 0@48r L0000000000000040 [48r,112r:0) 0@48r weight:0.000000e+00 -# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000080 [112e,112d:0) 0@112e L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r weight:0.000000e+00 +# CHECK-DBG: %3 [48r,112r:0) 0@48r L0000000000000080 [48r,112r:0) 0@48r L0000000000000040 [48r,112r:0) 0@48r weight:0.000000e+00 +# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000080 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r weight:0.000000e+00 # CHECK-DBG: %5 [32r,112r:1)[112r,112d:0) 0@112r 1@32r weight:0.000000e+00 +# CHECK-DBG: ********** MACHINEINSTRS ********** +# CHECK-DBG: 0B bb.0.entry: +# CHECK-DBG: 16B %0:gpr64sp = ADDXri %stack.0, 0, 0 +# CHECK-DBG: 32B %5:gpr64common = nuw ADDXri %0:gpr64sp, 64, 0 +# CHECK-DBG: 48B undef %3.sub_32:gpr64 = MOVi32imm 64, implicit-def %3:gpr64 +# CHECK-DBG: 80B undef %4.sub_32:gpr64 = MOVi32imm 64, implicit-def %4:gpr64 +# CHECK-DBG: 112B dead %5:gpr64common, dead early-clobber %4:gpr64 = MOPSMemorySetPseudo %5:gpr64common(tied-def 0), %4:gpr64(tied-def 1), %3:gpr64, implicit-def dead $nzcv +# CHECK-DBG: 128B RET_ReallyLR + --- name: test tracksRegLiveness: true @@ -43,9 +52,44 @@ body: | # CHECK-DBG: %1 [32r,48B:2)[48B,320r:0)[320r,368B:1) 0@48B-phi 1@320r 2@32r # CHECK-DBG-SAME: weight:0.000000e+00 # CHECK-DBG: %3 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi -# CHECK-DBG-SAME: L0000000000000080 [288r,304B:0)[304B,320r:3) 0@288r 1@x 2@x 3@304B-phi +# CHECK-DBG-SAME: L0000000000000080 [240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@x 3@304B-phi # CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi # CHECK-DBG-SAME: weight:0.000000e+00 +# CHECK-DBG: ********** MACHINEINSTRS ********** +# CHECK-DBG: 0B bb.0: +# CHECK-DBG: successors: %bb.1(0x80000000); %bb.1(100.00%) +# CHECK-DBG: 32B %1:gpr64 = IMPLICIT_DEF +# CHECK-DBG: 48B bb.1: +# CHECK-DBG: ; predecessors: %bb.0, %bb.7 +# CHECK-DBG: successors: %bb.2(0x80000000); %bb.2(100.00%) +# CHECK-DBG: 64B bb.2: +# CHECK-DBG: ; predecessors: %bb.1 +# CHECK-DBG: successors: %bb.3(0x80000000); %bb.3(100.00%) +# CHECK-DBG: 80B undef %3.sub_32:gpr64 = MOVi32imm 1 +# CHECK-DBG: 96B bb.3: +# CHECK-DBG: ; predecessors: %bb.2 +# CHECK-DBG: successors: %bb.7(0x40000000), %bb.4(0x40000000); %bb.7(50.00%), %bb.4(50.00%) +# CHECK-DBG: 112B $nzcv = IMPLICIT_DEF +# CHECK-DBG: 144B Bcc 1, %bb.7, implicit killed $nzcv +# CHECK-DBG: 160B bb.4: +# CHECK-DBG: ; predecessors: %bb.3 +# CHECK-DBG: successors: %bb.6(0x40000000), %bb.5(0x40000000); %bb.6(50.00%), %bb.5(50.00%) +# CHECK-DBG: 176B $nzcv = IMPLICIT_DEF +# CHECK-DBG: 192B Bcc 1, %bb.6, implicit killed $nzcv +# CHECK-DBG: 208B bb.5: +# CHECK-DBG: ; predecessors: %bb.4 +# CHECK-DBG: successors: %bb.7(0x80000000); %bb.7(100.00%) +# CHECK-DBG: 240B undef %3.sub_32:gpr64 = MOVi32imm 1, implicit-def %3:gpr64 +# CHECK-DBG: 256B B %bb.7 +# CHECK-DBG: 272B bb.6: +# CHECK-DBG: ; predecessors: %bb.4 +# CHECK-DBG: successors: %bb.7(0x80000000); %bb.7(100.00%) +# CHECK-DBG: 288B %3:gpr64 = COPY $xzr +# CHECK-DBG: 304B bb.7: +# CHECK-DBG: ; predecessors: %bb.3, %bb.5, %bb.6 +# CHECK-DBG: successors: %bb.1(0x80000000); %bb.1(100.00%) +# CHECK-DBG: 320B %1:gpr64 = ADDXrs %1:gpr64, %3:gpr64, 1 +# CHECK-DBG: 352B B %bb.1 --- name: reproducer tracksRegLiveness: true @@ -92,6 +136,42 @@ body: | # CHECK-DBG-SAME: L0000000000000080 [224r,256B:1)[272r,288B:0)[288B,304r:3) 0@272r 1@224r 2@x 3@288B-phi # CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[224r,256B:1)[272r,288B:0)[288B,304r:3) 0@272r 1@224r 2@80r 3@288B-phi # CHECK-DBG-SAME: weight:0.000000e+00 +# CHECK-DBG: ********** MACHINEINSTRS ********** +# CHECK-DBG: 0B bb.0: +# CHECK-DBG: successors: %bb.1(0x80000000); %bb.1(100.00%) +# CHECK-DBG: 32B %1:gpr64 = IMPLICIT_DEF +# CHECK-DBG: 48B bb.1: +# CHECK-DBG: ; predecessors: %bb.0, %bb.7 +# CHECK-DBG: successors: %bb.2(0x80000000); %bb.2(100.00%) +# CHECK-DBG: 64B bb.2: +# CHECK-DBG: ; predecessors: %bb.1 +# CHECK-DBG: successors: %bb.3(0x80000000); %bb.3(100.00%) +# CHECK-DBG: 80B undef %3.sub_32:gpr64 = MOVi32imm 1 +# CHECK-DBG: 96B bb.3: +# CHECK-DBG: ; predecessors: %bb.2 +# CHECK-DBG: successors: %bb.7(0x40000000), %bb.4(0x40000000); %bb.7(50.00%), %bb.4(50.00%) +# CHECK-DBG: 112B $nzcv = IMPLICIT_DEF +# CHECK-DBG: 144B Bcc 1, %bb.7, implicit killed $nzcv +# CHECK-DBG: 160B bb.4: +# CHECK-DBG: ; predecessors: %bb.3 +# CHECK-DBG: successors: %bb.6(0x40000000), %bb.5(0x40000000); %bb.6(50.00%), %bb.5(50.00%) +# CHECK-DBG: 176B $nzcv = IMPLICIT_DEF +# CHECK-DBG: 192B Bcc 1, %bb.6, implicit killed $nzcv +# CHECK-DBG: 208B bb.5: +# CHECK-DBG: ; predecessors: %bb.4 +# CHECK-DBG: successors: %bb.7(0x80000000); %bb.7(100.00%) +# CHECK-DBG: 224B %3:gpr64 = IMPLICIT_DEF +# CHECK-DBG: 240B B %bb.7 +# CHECK-DBG: 256B bb.6: +# CHECK-DBG: ; predecessors: %bb.4 +# CHECK-DBG: successors: %bb.7(0x80000000); %bb.7(100.00%) +# CHECK-DBG: 272B %3:gpr64 = COPY $xzr +# CHECK-DBG: 288B bb.7: +# CHECK-DBG: ; predecessors: %bb.3, %bb.5, %bb.6 +# CHECK-DBG: successors: %bb.1(0x80000000); %bb.1(100.00%) +# CHECK-DBG: 304B %1:gpr64 = ADDXrs %1:gpr64, %3:gpr64, 1 +# CHECK-DBG: 336B B %bb.1 + --- name: reproducer2 tracksRegLiveness: true @@ -127,3 +207,78 @@ body: | B %bb.1 ... +# CHECK-DBG: ********** REGISTER COALESCER ********** +# CHECK-DBG: ********** Function: reproducer3 +# CHECK-DBG: ********** JOINING INTERVALS *********** +# CHECK-DBG: ********** INTERVALS ********** +# CHECK-DBG: W0 [0B,32r:0)[320r,336r:1) 0@0B-phi 1@320r +# CHECK-DBG: W1 [0B,16r:0) 0@0B-phi +# CHECK-DBG: %0 [16r,64r:0) 0@16r weight:0.000000e+00 +# CHECK-DBG: %1 [32r,128r:0) 0@32r weight:0.000000e+00 +# CHECK-DBG: %2 [48r,64r:0) 0@48r weight:0.000000e+00 +# CHECK-DBG: %3 [64r,80r:0) 0@64r weight:0.000000e+00 +# CHECK-DBG: %4 [80r,176r:0) 0@80r weight:0.000000e+00 +# CHECK-DBG: %7 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r +# CHECK-DBG-SAME: L0000000000000080 [128r,256r:0)[304B,320r:0) 0@128r +# CHECK-DBG-SAME: L0000000000000040 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r +# CHECK-DBG-SAME: weight:0.000000e+00 +# CHECK-DBG: %8 [96r,176r:1)[176r,192r:0) 0@176r 1@96r weight:0.000000e+00 +# CHECK-DBG: %9 [256r,272r:0) 0@256r weight:0.000000e+00 +# CHECK-DBG: ********** MACHINEINSTRS ********** +# CHECK-DBG: 0B bb.0: +# CHECK-DBG: successors: %bb.2(0x40000000), %bb.1(0x40000000); %bb.2(50.00%), %bb.1(50.00%) +# CHECK-DBG: liveins: $w0, $w1 +# CHECK-DBG: 16B %0:gpr32 = COPY $w1 +# CHECK-DBG: 32B %1:gpr32 = COPY $w0 +# CHECK-DBG: 48B %2:gpr32 = UBFMWri %1:gpr32, 31, 30 +# CHECK-DBG: 64B %3:gpr32 = SUBWrs %2:gpr32, %0:gpr32, 1 +# CHECK-DBG: 80B %4:gpr32 = UBFMWri %3:gpr32, 1, 31 +# CHECK-DBG: 96B %8:gpr32common = MOVi32imm 1 +# CHECK-DBG: 112B undef %7.sub_32:gpr64 = MOVi32imm 1 +# CHECK-DBG: 128B undef %7.sub_32:gpr64 = BFMWri %7.sub_32:gpr64(tied-def 0), %1:gpr32, 31, 30, implicit-def %7:gpr64 +# CHECK-DBG: 176B %8:gpr32common = BFMWri %8:gpr32common(tied-def 0), %4:gpr32, 30, 29 +# CHECK-DBG: 192B dead $wzr = SUBSWri %8:gpr32common, 0, 0, implicit-def $nzcv +# CHECK-DBG: 208B Bcc 2, %bb.2, implicit killed $nzcv +# CHECK-DBG: 224B B %bb.1 +# CHECK-DBG: 240B bb.1: +# CHECK-DBG: ; predecessors: %bb.0 +# CHECK-DBG: 256B %9:gpr64common = UBFMXri %7:gpr64, 62, 61 +# CHECK-DBG: 272B dead $xzr = LDRXui %9:gpr64common, 0 +# CHECK-DBG: 288B RET_ReallyLR +# CHECK-DBG: 304B bb.2: +# CHECK-DBG: ; predecessors: %bb.0 +# CHECK-DBG: 320B $x0 = COPY %7:gpr64 +# CHECK-DBG: 336B RET_ReallyLR implicit $x0 + +--- +name: reproducer3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + %0:gpr32 = COPY killed $w1 + %1:gpr32 = COPY killed $w0 + %3:gpr32 = UBFMWri %1, 31, 30 + %4:gpr32 = SUBWrs killed %3, killed %0, 1 + %5:gpr32 = UBFMWri killed %4, 1, 31 + %6:gpr32 = MOVi32imm 1 + %7:gpr32 = COPY %6 + %7:gpr32 = BFMWri %7, killed %1, 31, 30 + %8:gpr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32 + %9:gpr32common = COPY killed %6 + %9:gpr32common = BFMWri %9, killed %5, 30, 29 + dead $wzr = SUBSWri killed %9, 0, 0, implicit-def $nzcv + Bcc 2, %bb.2, implicit killed $nzcv + B %bb.1 + + bb.1: + %10:gpr64common = UBFMXri killed %8, 62, 61 + dead $xzr = LDRXui killed %10, 0 + RET_ReallyLR + + bb.2: + $x0 = COPY killed %8 + RET_ReallyLR implicit killed $x0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fabs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fabs.ll new file mode 100644 index 0000000000000..96cf528056cb1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fabs.ll @@ -0,0 +1,340 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12 %s + +define amdgpu_ps void @v_fabs_f16(half %in, ptr addrspace(1) %out) { +; GCN-LABEL: v_fabs_f16: +; GCN: ; %bb.0: +; GCN-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GCN-NEXT: global_store_b16 v[1:2], v0, off +; GCN-NEXT: s_endpgm + %fabs = call half @llvm.fabs.f16(half %in) + store half %fabs, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fabs_f16(half inreg %in, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fabs_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_and_b32_e64 v2, 0x7fff, s0 +; GFX11-NEXT: global_store_b16 v[0:1], v2, off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fabs_f16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_and_b32 s0, s0, 0x7fff +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b16 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + %fabs = call half @llvm.fabs.f16(half %in) + store half %fabs, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fabs_f16_salu_use(half inreg %in, i32 inreg %val, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fabs_f16_salu_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_and_b32_e64 v2, 0x7fff, s0 +; GFX11-NEXT: s_cmp_eq_u32 s1, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_readfirstlane_b32 s0, v2 +; GFX11-NEXT: s_cselect_b32 s0, s0, 0 +; GFX11-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-NEXT: global_store_b16 v[0:1], v2, off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fabs_f16_salu_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_and_b32 s0, s0, 0x7fff +; GFX12-NEXT: s_cmp_eq_u32 s1, 0 +; GFX12-NEXT: s_cselect_b32 s0, s0, 0 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b16 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + %fabs = call half @llvm.fabs.f16(half %in) + %cond = icmp eq i32 %val, 0 + %sel = select i1 %cond, half %fabs, half 0.0 + store half %sel, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @v_fabs_f32(float %in, ptr addrspace(1) %out) { +; GCN-LABEL: v_fabs_f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 +; GCN-NEXT: global_store_b32 v[1:2], v0, off +; GCN-NEXT: s_endpgm + %fabs = call float @llvm.fabs.f32(float %in) + store float %fabs, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fabs_f32(float inreg %in, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fabs_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_and_b32_e64 v2, 0x7fffffff, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fabs_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_bitset0_b32 s0, 31 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + %fabs = call float @llvm.fabs.f32(float %in) + store float %fabs, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fabs_f32_salu_use(float inreg %in, i32 inreg %val, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fabs_f32_salu_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_and_b32_e64 v2, 0x7fffffff, s0 +; GFX11-NEXT: s_cmp_eq_u32 s1, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_readfirstlane_b32 s0, v2 +; GFX11-NEXT: s_cselect_b32 s0, s0, 0 +; GFX11-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fabs_f32_salu_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_bitset0_b32 s0, 31 +; GFX12-NEXT: s_cmp_eq_u32 s1, 0 +; GFX12-NEXT: s_cselect_b32 s0, s0, 0 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + %fabs = call float @llvm.fabs.f32(float %in) + %cond = icmp eq i32 %val, 0 + %sel = select i1 %cond, float %fabs, float 0.0 + store float %sel, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @v_fabs_f64(double %in, ptr addrspace(1) %out) { +; GCN-LABEL: v_fabs_f64: +; GCN: ; %bb.0: +; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 +; GCN-NEXT: global_store_b64 v[2:3], v[0:1], off +; GCN-NEXT: s_endpgm + %fabs = call double @llvm.fabs.f64(double %in) + store double %fabs, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fabs_f64(double inreg %in, ptr addrspace(1) %out) { +; GCN-LABEL: s_fabs_f64: +; GCN: ; %bb.0: +; GCN-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GCN-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3 +; GCN-NEXT: global_store_b64 v[0:1], v[2:3], off +; GCN-NEXT: s_endpgm + %fabs = call double @llvm.fabs.f64(double %in) + store double %fabs, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fabs_f64_salu_use(double inreg %in, i32 inreg %val, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fabs_f64_salu_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX11-NEXT: s_cmp_eq_u32 s2, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3 +; GFX11-NEXT: v_readfirstlane_b32 s0, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_readfirstlane_b32 s1, v3 +; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], 0 +; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fabs_f64_salu_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX12-NEXT: s_cmp_eq_u32 s2, 0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3 +; GFX12-NEXT: v_readfirstlane_b32 s0, v2 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_readfirstlane_b32 s1, v3 +; GFX12-NEXT: s_cselect_b64 s[0:1], s[0:1], 0 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX12-NEXT: s_endpgm + %fabs = call double @llvm.fabs.f64(double %in) + %cond = icmp eq i32 %val, 0 + %sel = select i1 %cond, double %fabs, double 0.0 + store double %sel, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @v_fabs_v2f16(<2 x half> %in, ptr addrspace(1) %out) { +; GCN-LABEL: v_fabs_v2f16: +; GCN: ; %bb.0: +; GCN-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GCN-NEXT: global_store_b32 v[1:2], v0, off +; GCN-NEXT: s_endpgm + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) + store <2 x half> %fabs, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fabs_v2f16(<2 x half> inreg %in, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fabs_v2f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_and_b32_e64 v2, 0x7fff7fff, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fabs_v2f16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_lshr_b32 s1, s0, 16 +; GFX12-NEXT: s_and_b32 s0, s0, 0x7fff +; GFX12-NEXT: s_and_b32 s1, s1, 0x7fff +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) + store <2 x half> %fabs, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fabs_v2f16_salu_use(<2 x half> inreg %in, i32 inreg %val, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fabs_v2f16_salu_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_and_b32_e64 v2, 0x7fff7fff, s0 +; GFX11-NEXT: s_cmp_eq_u32 s1, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_readfirstlane_b32 s0, v2 +; GFX11-NEXT: s_cselect_b32 s0, s0, 0 +; GFX11-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fabs_v2f16_salu_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_lshr_b32 s2, s0, 16 +; GFX12-NEXT: s_and_b32 s0, s0, 0x7fff +; GFX12-NEXT: s_and_b32 s2, s2, 0x7fff +; GFX12-NEXT: s_cmp_eq_u32 s1, 0 +; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_cselect_b32 s0, s0, 0 +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) + %cond = icmp eq i32 %val, 0 + %sel = select i1 %cond, <2 x half> %fabs, <2 x half> + store <2 x half> %sel, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @v_fabs_v2f32(<2 x float> %in, ptr addrspace(1) %out) { +; GCN-LABEL: v_fabs_v2f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 +; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 +; GCN-NEXT: global_store_b64 v[2:3], v[0:1], off +; GCN-NEXT: s_endpgm + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in) + store <2 x float> %fabs, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fabs_v2f32(<2 x float> inreg %in, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fabs_v2f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_and_b32_e64 v2, 0x7fffffff, s0 +; GFX11-NEXT: v_and_b32_e64 v3, 0x7fffffff, s1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_readfirstlane_b32 s0, v2 +; GFX11-NEXT: v_readfirstlane_b32 s1, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fabs_v2f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_bitset0_b32 s0, 31 +; GFX12-NEXT: s_bitset0_b32 s1, 31 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX12-NEXT: s_endpgm + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in) + store <2 x float> %fabs, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fabs_v2f32_salu_use(<2 x float> inreg %in, i32 inreg %val, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fabs_v2f32_salu_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_and_b32_e64 v2, 0x7fffffff, s0 +; GFX11-NEXT: v_and_b32_e64 v3, 0x7fffffff, s1 +; GFX11-NEXT: s_cmp_eq_u32 s2, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_readfirstlane_b32 s0, v2 +; GFX11-NEXT: v_readfirstlane_b32 s1, v3 +; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fabs_v2f32_salu_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_bitset0_b32 s0, 31 +; GFX12-NEXT: s_bitset0_b32 s1, 31 +; GFX12-NEXT: s_cmp_eq_u32 s2, 0 +; GFX12-NEXT: s_cselect_b64 s[0:1], s[0:1], 0 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX12-NEXT: s_endpgm + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in) + %cond = icmp eq i32 %val, 0 + %sel = select i1 %cond, <2 x float> %fabs, <2 x float> + store <2 x float> %sel, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @v_fabs_fneg_f32(float %in, ptr addrspace(1) %out) { +; GCN-LABEL: v_fabs_fneg_f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_or_b32_e32 v0, 0x80000000, v0 +; GCN-NEXT: global_store_b32 v[1:2], v0, off +; GCN-NEXT: s_endpgm + %fabs = call float @llvm.fabs.f32(float %in) + %fneg = fneg float %fabs + store float %fneg, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fabs_fneg_f32(float inreg %in, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fabs_fneg_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_or_b32_e64 v2, 0x80000000, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fabs_fneg_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_bitset1_b32 s0, 31 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + %fabs = call float @llvm.fabs.f32(float %in) + %fneg = fneg float %fabs + store float %fneg, ptr addrspace(1) %out + ret void +} + +declare half @llvm.fabs.f16(half) +declare float @llvm.fabs.f32(float) +declare double @llvm.fabs.f64(double) +declare <2 x half> @llvm.fabs.v2f16(<2 x half>) +declare <2 x float> @llvm.fabs.v2f32(<2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fneg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fneg.ll new file mode 100644 index 0000000000000..8a260926d0a4f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fneg.ll @@ -0,0 +1,303 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12 %s + +define amdgpu_ps void @v_fneg_f16(half %in, ptr addrspace(1) %out) { +; GCN-LABEL: v_fneg_f16: +; GCN: ; %bb.0: +; GCN-NEXT: v_xor_b32_e32 v0, 0x8000, v0 +; GCN-NEXT: global_store_b16 v[1:2], v0, off +; GCN-NEXT: s_endpgm + %fneg = fneg half %in + store half %fneg, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fneg_f16(half inreg %in, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fneg_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_xor_b32_e64 v2, 0x8000, s0 +; GFX11-NEXT: global_store_b16 v[0:1], v2, off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fneg_f16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_xor_b32 s0, s0, 0x8000 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b16 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + %fneg = fneg half %in + store half %fneg, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fneg_f16_salu_use(half inreg %in, i32 inreg %val, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fneg_f16_salu_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_xor_b32_e64 v2, 0x8000, s0 +; GFX11-NEXT: s_cmp_eq_u32 s1, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_readfirstlane_b32 s0, v2 +; GFX11-NEXT: s_cselect_b32 s0, s0, 0 +; GFX11-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-NEXT: global_store_b16 v[0:1], v2, off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fneg_f16_salu_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_xor_b32 s0, s0, 0x8000 +; GFX12-NEXT: s_cmp_eq_u32 s1, 0 +; GFX12-NEXT: s_cselect_b32 s0, s0, 0 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b16 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + %fneg = fneg half %in + %cond = icmp eq i32 %val, 0 + %sel = select i1 %cond, half %fneg, half 0.0 + store half %sel, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @v_fneg_f32(float %in, ptr addrspace(1) %out) { +; GCN-LABEL: v_fneg_f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 +; GCN-NEXT: global_store_b32 v[1:2], v0, off +; GCN-NEXT: s_endpgm + %fneg = fneg float %in + store float %fneg, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fneg_f32(float inreg %in, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fneg_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_xor_b32_e64 v2, 0x80000000, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fneg_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_xor_b32 s0, s0, 0x80000000 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + %fneg = fneg float %in + store float %fneg, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fneg_f32_salu_use(float inreg %in, i32 inreg %val, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fneg_f32_salu_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_xor_b32_e64 v2, 0x80000000, s0 +; GFX11-NEXT: s_cmp_eq_u32 s1, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_readfirstlane_b32 s0, v2 +; GFX11-NEXT: s_cselect_b32 s0, s0, 0 +; GFX11-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fneg_f32_salu_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_xor_b32 s0, s0, 0x80000000 +; GFX12-NEXT: s_cmp_eq_u32 s1, 0 +; GFX12-NEXT: s_cselect_b32 s0, s0, 0 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + %fneg = fneg float %in + %cond = icmp eq i32 %val, 0 + %sel = select i1 %cond, float %fneg, float 0.0 + store float %sel, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @v_fneg_f64(double %in, ptr addrspace(1) %out) { +; GCN-LABEL: v_fneg_f64: +; GCN: ; %bb.0: +; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 +; GCN-NEXT: global_store_b64 v[2:3], v[0:1], off +; GCN-NEXT: s_endpgm + %fneg = fneg double %in + store double %fneg, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fneg_f64(double inreg %in, ptr addrspace(1) %out) { +; GCN-LABEL: s_fneg_f64: +; GCN: ; %bb.0: +; GCN-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 +; GCN-NEXT: global_store_b64 v[0:1], v[2:3], off +; GCN-NEXT: s_endpgm + %fneg = fneg double %in + store double %fneg, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fneg_f64_salu_use(double inreg %in, i32 inreg %val, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fneg_f64_salu_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX11-NEXT: s_cmp_eq_u32 s2, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 +; GFX11-NEXT: v_readfirstlane_b32 s0, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_readfirstlane_b32 s1, v3 +; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], 0 +; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fneg_f64_salu_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX12-NEXT: s_cmp_eq_u32 s2, 0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 +; GFX12-NEXT: v_readfirstlane_b32 s0, v2 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_readfirstlane_b32 s1, v3 +; GFX12-NEXT: s_cselect_b64 s[0:1], s[0:1], 0 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX12-NEXT: s_endpgm + %fneg = fneg double %in + %cond = icmp eq i32 %val, 0 + %sel = select i1 %cond, double %fneg, double 0.0 + store double %sel, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @v_fneg_v2f16(<2 x half> %in, ptr addrspace(1) %out) { +; GCN-LABEL: v_fneg_v2f16: +; GCN: ; %bb.0: +; GCN-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GCN-NEXT: global_store_b32 v[1:2], v0, off +; GCN-NEXT: s_endpgm + %fneg = fneg <2 x half> %in + store <2 x half> %fneg, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fneg_v2f16(<2 x half> inreg %in, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fneg_v2f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_xor_b32_e64 v2, 0x80008000, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fneg_v2f16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_lshr_b32 s1, s0, 16 +; GFX12-NEXT: s_xor_b32 s0, s0, 0x8000 +; GFX12-NEXT: s_xor_b32 s1, s1, 0x8000 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + %fneg = fneg <2 x half> %in + store <2 x half> %fneg, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fneg_v2f16_salu_use(<2 x half> inreg %in, i32 inreg %val, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fneg_v2f16_salu_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_xor_b32_e64 v2, 0x80008000, s0 +; GFX11-NEXT: s_cmp_eq_u32 s1, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_readfirstlane_b32 s0, v2 +; GFX11-NEXT: s_cselect_b32 s0, s0, 0 +; GFX11-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fneg_v2f16_salu_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_lshr_b32 s2, s0, 16 +; GFX12-NEXT: s_xor_b32 s0, s0, 0x8000 +; GFX12-NEXT: s_xor_b32 s2, s2, 0x8000 +; GFX12-NEXT: s_cmp_eq_u32 s1, 0 +; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_cselect_b32 s0, s0, 0 +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + %fneg = fneg <2 x half> %in + %cond = icmp eq i32 %val, 0 + %sel = select i1 %cond, <2 x half> %fneg, <2 x half> + store <2 x half> %sel, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @v_fneg_v2f32(<2 x float> %in, ptr addrspace(1) %out) { +; GCN-LABEL: v_fneg_v2f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 +; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 +; GCN-NEXT: global_store_b64 v[2:3], v[0:1], off +; GCN-NEXT: s_endpgm + %fneg = fneg <2 x float> %in + store <2 x float> %fneg, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fneg_v2f32(<2 x float> inreg %in, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fneg_v2f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_xor_b32_e64 v2, 0x80000000, s0 +; GFX11-NEXT: v_xor_b32_e64 v3, 0x80000000, s1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_readfirstlane_b32 s0, v2 +; GFX11-NEXT: v_readfirstlane_b32 s1, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fneg_v2f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_xor_b32 s0, s0, 0x80000000 +; GFX12-NEXT: s_xor_b32 s1, s1, 0x80000000 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX12-NEXT: s_endpgm + %fneg = fneg <2 x float> %in + store <2 x float> %fneg, ptr addrspace(1) %out + ret void +} +define amdgpu_ps void @s_fneg_v2f32_salu_use(<2 x float> inreg %in, i32 inreg %val, ptr addrspace(1) %out) { +; GFX11-LABEL: s_fneg_v2f32_salu_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_xor_b32_e64 v2, 0x80000000, s0 +; GFX11-NEXT: v_xor_b32_e64 v3, 0x80000000, s1 +; GFX11-NEXT: s_cmp_eq_u32 s2, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_readfirstlane_b32 s0, v2 +; GFX11-NEXT: v_readfirstlane_b32 s1, v3 +; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: s_fneg_v2f32_salu_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_xor_b32 s0, s0, 0x80000000 +; GFX12-NEXT: s_xor_b32 s1, s1, 0x80000000 +; GFX12-NEXT: s_cmp_eq_u32 s2, 0 +; GFX12-NEXT: s_cselect_b64 s[0:1], s[0:1], 0 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX12-NEXT: s_endpgm + %fneg = fneg <2 x float> %in + %cond = icmp eq i32 %val, 0 + %sel = select i1 %cond, <2 x float> %fneg, <2 x float> + store <2 x float> %sel, ptr addrspace(1) %out + ret void +} diff --git a/llvm/test/CodeGen/BPF/atomic-oversize.ll b/llvm/test/CodeGen/BPF/atomic-oversize.ll index 187f0964d4fb8..6dc49398f091d 100644 --- a/llvm/test/CodeGen/BPF/atomic-oversize.ll +++ b/llvm/test/CodeGen/BPF/atomic-oversize.ll @@ -1,6 +1,4 @@ ; RUN: llc -mtriple=bpf < %s | FileCheck %s -; XFAIL: * -; Doesn't currently build, with error 'only small returns supported'. define void @test(ptr %a) nounwind { ; CHECK-LABEL: test: diff --git a/llvm/test/CodeGen/BPF/builtin_calls.ll b/llvm/test/CodeGen/BPF/builtin_calls.ll new file mode 100644 index 0000000000000..18199eba7222a --- /dev/null +++ b/llvm/test/CodeGen/BPF/builtin_calls.ll @@ -0,0 +1,39 @@ +; RUN: llc -march=bpfel -mattr=+allow-builtin-calls < %s | FileCheck %s +; +; C code for this test case: +; +; long func(long a, long b) { +; long x; +; return __builtin_mul_overflow(a, b, &x); +; } + + +declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64) + +define noundef range(i64 0, 2) i64 @func(i64 noundef %a, i64 noundef %b) local_unnamed_addr { +entry: + %0 = tail call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %a, i64 %b) + %1 = extractvalue { i64, i1 } %0, 1 + %conv = zext i1 %1 to i64 + ret i64 %conv +} + +; CHECK-LABEL: func +; CHECK: r4 = r2 +; CHECK: r2 = r1 +; CHECK: r3 = r2 +; CHECK: r3 s>>= 63 +; CHECK: r5 = r4 +; CHECK: r5 s>>= 63 +; CHECK: r1 = r10 +; CHECK: r1 += -16 +; CHECK: call __multi3 +; CHECK: r1 = *(u64 *)(r10 - 16) +; CHECK: r1 s>>= 63 +; CHECK: w0 = 1 +; CHECK: r2 = *(u64 *)(r10 - 8) +; CHECK: if r2 != r1 goto LBB0_2 +; CHECK: # %bb.1: # %entry +; CHECK: w0 = 0 +; CHECK: LBB0_2: # %entry +; CHECK: exit \ No newline at end of file diff --git a/llvm/test/CodeGen/BPF/struct_ret1.ll b/llvm/test/CodeGen/BPF/struct_ret1.ll index 40d17ec514c48..eb66a7deacb91 100644 --- a/llvm/test/CodeGen/BPF/struct_ret1.ll +++ b/llvm/test/CodeGen/BPF/struct_ret1.ll @@ -1,6 +1,6 @@ ; RUN: not llc -mtriple=bpf < %s 2> %t1 ; RUN: FileCheck %s < %t1 -; CHECK: error: :0:0: in function bar { i64, i32 } (i32, i32, i32, i32, i32): aggregate returns are not supported +; CHECK: error: :0:0: in function bar { i64, i32 } (i32, i32, i32, i32, i32): stack arguments are not supported %struct.S = type { i32, i32, i32 } diff --git a/llvm/test/CodeGen/BPF/struct_ret2.ll b/llvm/test/CodeGen/BPF/struct_ret2.ll index 170d55cc29df0..a20280949215e 100644 --- a/llvm/test/CodeGen/BPF/struct_ret2.ll +++ b/llvm/test/CodeGen/BPF/struct_ret2.ll @@ -1,6 +1,6 @@ ; RUN: not llc -mtriple=bpf < %s 2> %t1 ; RUN: FileCheck %s < %t1 -; CHECK: only small returns +; CHECK: too many arguments ; Function Attrs: nounwind uwtable define { i64, i32 } @foo(i32 %a, i32 %b, i32 %c) #0 { diff --git a/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll index 156a8e7c5c386..def886f933d08 100644 --- a/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll +++ b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll @@ -11,9 +11,10 @@ define void @CSMain() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE:%.*]] = alloca [4 x i32], align 16 ; -; SCHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1), i32 0, i32 2 -; SCHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(3) [[TMP0]], align 16 -; SCHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16 +; SCHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1 +; SCHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [10 x [4 x i32]], ptr addrspace(3) [[GEP0]], i32 0, i32 2 +; SCHECK-NEXT: [[LOAD:%.*]] = load <4 x i32>, ptr addrspace(3) [[GEP1]], align 16 +; SCHECK-NEXT: store <4 x i32> [[LOAD]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16 ; ; FCHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE_I14:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 1 ; FCHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE_I25:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 2 @@ -40,12 +41,13 @@ define void @Main() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[BFRAGPACKED_I:%.*]] = alloca i32, align 16 ; -; SCHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 1 -; SCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[TMP0]], align 16 -; SCHECK-NEXT: store i32 [[TMP1]], ptr [[BFRAGPACKED_I]], align 16 +; SCHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1 +; SCHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) [[GEP0]], i32 0, i32 1 +; SCHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(3) [[GEP1]], align 16 +; SCHECK-NEXT: store i32 [[LOAD]], ptr [[BFRAGPACKED_I]], align 16 ; -; FCHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16 -; FCHECK-NEXT: store i32 [[TMP0]], ptr [[BFRAGPACKED_I]], align 16 +; FCHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16 +; FCHECK-NEXT: store i32 [[LOAD]], ptr [[BFRAGPACKED_I]], align 16 ; ; CHECK-NEXT: ret void entry: @@ -57,10 +59,12 @@ entry: define void @global_nested_geps_3d() { ; CHECK-LABEL: define void @global_nested_geps_3d() { -; SCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1 -; SCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +; SCHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1 +; SCHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[GEP0]], i32 0, i32 1 +; SCHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr [[GEP1]], i32 0, i32 1 +; SCHECK-NEXT: load i32, ptr [[GEP2]], align 4 ; -; FCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4 +; FCHECK-NEXT: load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4 ; ; CHECK-NEXT: ret void %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* @cTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4 @@ -69,10 +73,13 @@ define void @global_nested_geps_3d() { define void @global_nested_geps_4d() { ; CHECK-LABEL: define void @global_nested_geps_4d() { -; SCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x <2 x i32>]], ptr getelementptr inbounds ([2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1 -; SCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +; SCHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1 +; SCHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], ptr [[GEP0]], i32 0, i32 1 +; SCHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[GEP1]], i32 0, i32 1 +; SCHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr [[GEP2]], i32 0, i32 1 +; SCHECK-NEXT: load i32, ptr [[GEP3]], align 4 ; -; FCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4 +; FCHECK-NEXT: load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4 ; ; CHECK-NEXT: ret void %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* getelementptr inbounds ([2 x [2 x [2 x <2 x i32>]]], [2 x [2 x [2 x <2 x i32>]]]* @dTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4 diff --git a/llvm/test/CodeGen/DirectX/scalarize-alloca.ll b/llvm/test/CodeGen/DirectX/scalarize-alloca.ll index 475935d2eb135..85e3bb0185e44 100644 --- a/llvm/test/CodeGen/DirectX/scalarize-alloca.ll +++ b/llvm/test/CodeGen/DirectX/scalarize-alloca.ll @@ -48,7 +48,7 @@ define void @subtype_array_test() { ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4 ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4 ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0) - ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]] + ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[alloca_val]], i32 [[tid]] ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4 ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]] ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]] @@ -64,7 +64,7 @@ define void @subtype_vector_test() { ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4 ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4 ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0) - ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]] + ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[alloca_val]], i32 [[tid]] ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4 ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]] ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]] @@ -80,7 +80,7 @@ define void @subtype_scalar_test() { ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4 ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4 ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0) - ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 0, i32 [[tid]] + ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i32, ptr [[alloca_val]], i32 [[tid]] ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1 ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]] ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]] diff --git a/llvm/test/CodeGen/DirectX/scalarize-global.ll b/llvm/test/CodeGen/DirectX/scalarize-global.ll index ca10f6ece5a85..c27dc4083bfd3 100644 --- a/llvm/test/CodeGen/DirectX/scalarize-global.ll +++ b/llvm/test/CodeGen/DirectX/scalarize-global.ll @@ -11,7 +11,7 @@ ; CHECK-LABEL: subtype_array_test define <4 x i32> @subtype_array_test() { ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0) - ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]] + ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) [[arrayofVecData]], i32 [[tid]] ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4 ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]] ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]] @@ -26,7 +26,7 @@ define <4 x i32> @subtype_array_test() { ; CHECK-LABEL: subtype_vector_test define <4 x i32> @subtype_vector_test() { ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0) - ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]] + ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) [[arrayofVecData]], i32 [[tid]] ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4 ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]] ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]] @@ -41,7 +41,7 @@ define <4 x i32> @subtype_vector_test() { ; CHECK-LABEL: subtype_scalar_test define <4 x i32> @subtype_scalar_test() { ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0) - ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 0, i32 [[tid]] + ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(3) [[arrayofVecData]], i32 [[tid]] ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1 ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]] ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]] diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll index d09ef0e2c6ac0..c12dbe488263f 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll @@ -1532,11 +1532,11 @@ define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float ; CHECK-NEXT: # kill: def $f7 killed $f7 def $vr7 ; CHECK-NEXT: # kill: def $f6 killed $f6 def $vr6 ; CHECK-NEXT: # kill: def $f5 killed $f5 def $vr5 -; CHECK-NEXT: # kill: def $f4 killed $f4 def $xr4 +; CHECK-NEXT: # kill: def $f4 killed $f4 def $vr4 def $xr4 ; CHECK-NEXT: # kill: def $f3 killed $f3 def $vr3 ; CHECK-NEXT: # kill: def $f2 killed $f2 def $vr2 ; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1 -; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 +; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0 ; CHECK-NEXT: vextrins.w $vr4, $vr5, 16 ; CHECK-NEXT: vextrins.w $vr4, $vr6, 32 ; CHECK-NEXT: vextrins.w $vr4, $vr7, 48 @@ -1619,7 +1619,7 @@ define void @buildvector_v8f32_subseq_2(ptr %dst, float %a0, float %a1, float %a ; CHECK-NEXT: # kill: def $f3 killed $f3 def $vr3 ; CHECK-NEXT: # kill: def $f2 killed $f2 def $vr2 ; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1 -; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 +; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0 ; CHECK-NEXT: vextrins.w $vr0, $vr1, 16 ; CHECK-NEXT: vextrins.w $vr0, $vr2, 32 ; CHECK-NEXT: vextrins.w $vr0, $vr3, 48 @@ -1643,7 +1643,7 @@ define void @buildvector_v8f32_subseq_4(ptr %dst, float %a0, float %a1) nounwind ; CHECK-LABEL: buildvector_v8f32_subseq_4: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1 -; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 +; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0 ; CHECK-NEXT: vextrins.w $vr0, $vr1, 16 ; CHECK-NEXT: xvreplve0.d $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 @@ -1665,9 +1665,9 @@ define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, dou ; CHECK-LABEL: buildvector_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: # kill: def $f3_64 killed $f3_64 def $vr3 -; CHECK-NEXT: # kill: def $f2_64 killed $f2_64 def $xr2 +; CHECK-NEXT: # kill: def $f2_64 killed $f2_64 def $vr2 def $xr2 ; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $vr1 -; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0 ; CHECK-NEXT: vextrins.d $vr2, $vr3, 16 ; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 @@ -1722,7 +1722,7 @@ define void @buildvector_v4f64_subseq_2(ptr %dst, double %a0, double %a1) nounwi ; CHECK-LABEL: buildvector_v4f64_subseq_2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $vr1 -; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0 ; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: xvreplve0.q $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 diff --git a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll index 45b25013c9173..d01848e8547b6 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll @@ -22,7 +22,7 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind { ; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl __powisf2 -; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0 +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0 ; LA32-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload ; LA32-NEXT: vextrins.w $vr0, $vr1, 16 ; LA32-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill @@ -56,7 +56,7 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind { ; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl __powisf2 -; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0 +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0 ; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload ; LA32-NEXT: vextrins.w $vr0, $vr1, 16 ; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill @@ -105,7 +105,7 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0 ; LA64-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload ; LA64-NEXT: vextrins.w $vr0, $vr1, 16 ; LA64-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill @@ -143,7 +143,7 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0 ; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload ; LA64-NEXT: vextrins.w $vr0, $vr1, 16 ; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill @@ -198,7 +198,7 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind { ; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl __powidf2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0 ; LA32-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload ; LA32-NEXT: vextrins.d $vr0, $vr1, 16 ; LA32-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill @@ -214,7 +214,7 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind { ; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl __powidf2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0 ; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload ; LA32-NEXT: vextrins.d $vr0, $vr1, 16 ; LA32-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload @@ -244,7 +244,7 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(__powidf2) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0 ; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload ; LA64-NEXT: vextrins.d $vr0, $vr1, 16 ; LA64-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill @@ -262,7 +262,7 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(__powidf2) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0 ; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload ; LA64-NEXT: vextrins.d $vr0, $vr1, 16 ; LA64-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload diff --git a/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll index bba269279937a..be5d42bdfb975 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll @@ -49,7 +49,7 @@ define <4 x i64> @scalar_to_4xi64(i64 %val) { define <8 x float> @scalar_to_8xf32(float %val) { ; CHECK-LABEL: scalar_to_8xf32: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 +; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0 ; CHECK-NEXT: ret %ret = insertelement <8 x float> poison, float %val, i32 0 ret <8 x float> %ret @@ -58,7 +58,7 @@ define <8 x float> @scalar_to_8xf32(float %val) { define <4 x double> @scalar_to_4xf64(double %val) { ; CHECK-LABEL: scalar_to_4xf64: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0 ; CHECK-NEXT: ret %ret = insertelement <4 x double> poison, double %val, i32 0 ret <4 x double> %ret diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll index afc7a39e18dc8..aae23265710ce 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -750,21 +750,25 @@ entry: define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) { ; CHECK-64-LABEL: testDoubleImm1: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-64-NEXT: xxpermdi 34, 1, 34, 1 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testDoubleImm1: ; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-32-NEXT: xxpermdi 34, 1, 34, 1 ; CHECK-32-NEXT: blr ; ; CHECK-64-P10-LABEL: testDoubleImm1: ; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-64-P10-NEXT: xxpermdi 34, 1, 34, 1 ; CHECK-64-P10-NEXT: blr ; ; CHECK-32-P10-LABEL: testDoubleImm1: ; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-32-P10-NEXT: xxpermdi 34, 1, 34, 1 ; CHECK-32-P10-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll index fb55511162a7e..4f965780ab95e 100644 --- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll +++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll @@ -1754,7 +1754,11 @@ entry: define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) { ; P9BE-LABEL: fromRegsConvdtoi: ; P9BE: # %bb.0: # %entry +; P9BE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 ; P9BE-NEXT: xxmrghd vs0, vs2, vs4 +; P9BE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P9BE-NEXT: xvcvdpsxws v2, vs0 ; P9BE-NEXT: xxmrghd vs0, vs1, vs3 ; P9BE-NEXT: xvcvdpsxws v3, vs0 @@ -1763,7 +1767,11 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) { ; ; P9LE-LABEL: fromRegsConvdtoi: ; P9LE: # %bb.0: # %entry +; P9LE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P9LE-NEXT: xxmrghd vs0, vs3, vs1 +; P9LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 ; P9LE-NEXT: xvcvdpsxws v2, vs0 ; P9LE-NEXT: xxmrghd vs0, vs4, vs2 ; P9LE-NEXT: xvcvdpsxws v3, vs0 @@ -1772,6 +1780,10 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) { ; ; P8BE-LABEL: fromRegsConvdtoi: ; P8BE: # %bb.0: # %entry +; P8BE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P8BE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8BE-NEXT: xxmrghd vs0, vs2, vs4 ; P8BE-NEXT: xxmrghd vs1, vs1, vs3 ; P8BE-NEXT: xvcvdpsxws v2, vs0 @@ -1781,6 +1793,10 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) { ; ; P8LE-LABEL: fromRegsConvdtoi: ; P8LE: # %bb.0: # %entry +; P8LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P8LE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8LE-NEXT: xxmrghd vs0, vs3, vs1 ; P8LE-NEXT: xxmrghd vs1, vs4, vs2 ; P8LE-NEXT: xvcvdpsxws v2, vs0 @@ -3240,7 +3256,11 @@ entry: define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) { ; P9BE-LABEL: fromRegsConvdtoui: ; P9BE: # %bb.0: # %entry +; P9BE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 ; P9BE-NEXT: xxmrghd vs0, vs2, vs4 +; P9BE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P9BE-NEXT: xvcvdpuxws v2, vs0 ; P9BE-NEXT: xxmrghd vs0, vs1, vs3 ; P9BE-NEXT: xvcvdpuxws v3, vs0 @@ -3249,7 +3269,11 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) ; ; P9LE-LABEL: fromRegsConvdtoui: ; P9LE: # %bb.0: # %entry +; P9LE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P9LE-NEXT: xxmrghd vs0, vs3, vs1 +; P9LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 ; P9LE-NEXT: xvcvdpuxws v2, vs0 ; P9LE-NEXT: xxmrghd vs0, vs4, vs2 ; P9LE-NEXT: xvcvdpuxws v3, vs0 @@ -3258,6 +3282,10 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) ; ; P8BE-LABEL: fromRegsConvdtoui: ; P8BE: # %bb.0: # %entry +; P8BE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P8BE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8BE-NEXT: xxmrghd vs0, vs2, vs4 ; P8BE-NEXT: xxmrghd vs1, vs1, vs3 ; P8BE-NEXT: xvcvdpuxws v2, vs0 @@ -3267,6 +3295,10 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) ; ; P8LE-LABEL: fromRegsConvdtoui: ; P8LE: # %bb.0: # %entry +; P8LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P8LE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8LE-NEXT: xxmrghd vs0, vs3, vs1 ; P8LE-NEXT: xxmrghd vs1, vs4, vs2 ; P8LE-NEXT: xvcvdpuxws v2, vs0 @@ -4540,24 +4572,32 @@ entry: define <2 x i64> @fromRegsConvdtoll(double %a, double %b) { ; P9BE-LABEL: fromRegsConvdtoll: ; P9BE: # %bb.0: # %entry +; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P9BE-NEXT: xxmrghd vs0, vs1, vs2 ; P9BE-NEXT: xvcvdpsxds v2, vs0 ; P9BE-NEXT: blr ; ; P9LE-LABEL: fromRegsConvdtoll: ; P9LE: # %bb.0: # %entry +; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P9LE-NEXT: xxmrghd vs0, vs2, vs1 ; P9LE-NEXT: xvcvdpsxds v2, vs0 ; P9LE-NEXT: blr ; ; P8BE-LABEL: fromRegsConvdtoll: ; P8BE: # %bb.0: # %entry +; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8BE-NEXT: xxmrghd vs0, vs1, vs2 ; P8BE-NEXT: xvcvdpsxds v2, vs0 ; P8BE-NEXT: blr ; ; P8LE-LABEL: fromRegsConvdtoll: ; P8LE: # %bb.0: # %entry +; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8LE-NEXT: xxmrghd vs0, vs2, vs1 ; P8LE-NEXT: xvcvdpsxds v2, vs0 ; P8LE-NEXT: blr @@ -5694,24 +5734,32 @@ entry: define <2 x i64> @fromRegsConvdtoull(double %a, double %b) { ; P9BE-LABEL: fromRegsConvdtoull: ; P9BE: # %bb.0: # %entry +; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P9BE-NEXT: xxmrghd vs0, vs1, vs2 ; P9BE-NEXT: xvcvdpuxds v2, vs0 ; P9BE-NEXT: blr ; ; P9LE-LABEL: fromRegsConvdtoull: ; P9LE: # %bb.0: # %entry +; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P9LE-NEXT: xxmrghd vs0, vs2, vs1 ; P9LE-NEXT: xvcvdpuxds v2, vs0 ; P9LE-NEXT: blr ; ; P8BE-LABEL: fromRegsConvdtoull: ; P8BE: # %bb.0: # %entry +; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8BE-NEXT: xxmrghd vs0, vs1, vs2 ; P8BE-NEXT: xvcvdpuxds v2, vs0 ; P8BE-NEXT: blr ; ; P8LE-LABEL: fromRegsConvdtoull: ; P8LE: # %bb.0: # %entry +; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8LE-NEXT: xxmrghd vs0, vs2, vs1 ; P8LE-NEXT: xvcvdpuxds v2, vs0 ; P8LE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll index 7f6fdc7f88cd1..b40fbc3e16873 100644 --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -562,6 +562,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea ; CHECK-P8-NEXT: bl dummy ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: xxlxor f0, f0, f0 +; CHECK-P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 @@ -576,6 +577,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea ; CHECK-P9-NEXT: bl dummy ; CHECK-P9-NEXT: nop ; CHECK-P9-NEXT: xxlxor f0, f0, f0 +; CHECK-P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0 ; CHECK-P9-NEXT: stxv vs0, 0(r30) ; @@ -589,6 +591,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea ; CHECK-P9-BE-NEXT: bl dummy ; CHECK-P9-BE-NEXT: nop ; CHECK-P9-BE-NEXT: xxlxor f0, f0, f0 +; CHECK-P9-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-P9-BE-NEXT: xxmrghd vs0, vs0, vs1 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r30) ; @@ -615,6 +618,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea ; CHECK-P7-NEXT: bl dummy ; CHECK-P7-NEXT: nop ; CHECK-P7-NEXT: xxlxor f0, f0, f0 +; CHECK-P7-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-P7-NEXT: xxmrghd vs0, vs1, vs0 ; CHECK-P7-NEXT: xxswapd vs0, vs0 ; CHECK-P7-NEXT: stxvd2x vs0, 0, r30 @@ -629,6 +633,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea ; P8-AIX-64-NEXT: bl .dummy[PR] ; P8-AIX-64-NEXT: nop ; P8-AIX-64-NEXT: xxlxor f0, f0, f0 +; P8-AIX-64-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8-AIX-64-NEXT: xxmrghd vs0, vs0, vs1 ; P8-AIX-64-NEXT: stxvd2x vs0, 0, r31 ; @@ -642,6 +647,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea ; P8-AIX-32-NEXT: bl .dummy[PR] ; P8-AIX-32-NEXT: nop ; P8-AIX-32-NEXT: xxlxor f0, f0, f0 +; P8-AIX-32-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8-AIX-32-NEXT: xxmrghd vs0, vs0, vs1 ; P8-AIX-32-NEXT: stxvd2x vs0, 0, r31 test_entry: diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll index 04af0947c7a33..a72abf7007e8d 100644 --- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll +++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll @@ -6,6 +6,7 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) { ; CHECK-LABEL: fneg_fdiv_splat: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxspltd 0, 1, 0 ; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l ; CHECK-NEXT: xvredp 1, 0 diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll index eac4fb6f98bf7..4519cf4101f42 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll @@ -229,6 +229,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp ; P8-NEXT: xscvspdpn f1, vs0 ; P8-NEXT: bl nearbyintf ; P8-NEXT: nop +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8-NEXT: xxmrghd vs0, vs1, v30 ; P8-NEXT: xscvspdpn f1, v31 ; P8-NEXT: xvcvdpsp v29, vs0 @@ -239,6 +240,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp ; P8-NEXT: xscvspdpn f1, vs0 ; P8-NEXT: bl nearbyintf ; P8-NEXT: nop +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8-NEXT: xxmrghd vs0, v30, vs1 ; P8-NEXT: li r3, 160 ; P8-NEXT: xvcvdpsp v2, vs0 @@ -276,6 +278,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp ; P9-NEXT: xscvspdpn f1, vs0 ; P9-NEXT: bl nearbyintf ; P9-NEXT: nop +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P9-NEXT: xxmrghd vs0, vs1, v30 ; P9-NEXT: xscvspdpn f1, v31 ; P9-NEXT: xvcvdpsp v29, vs0 @@ -286,6 +289,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp ; P9-NEXT: xscvspdpn f1, vs0 ; P9-NEXT: bl nearbyintf ; P9-NEXT: nop +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P9-NEXT: xxmrghd vs0, v30, vs1 ; P9-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload ; P9-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload @@ -326,6 +330,7 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) stric ; P8-NEXT: bl nearbyint ; P8-NEXT: nop ; P8-NEXT: li r3, 144 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8-NEXT: xxmrghd v2, v30, vs1 ; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; P8-NEXT: li r3, 128 @@ -354,6 +359,7 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) stric ; P9-NEXT: xxswapd vs1, v31 ; P9-NEXT: bl nearbyint ; P9-NEXT: nop +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P9-NEXT: xxmrghd v2, v30, vs1 ; P9-NEXT: lxv v31, 48(r1) # 16-byte Folded Reload ; P9-NEXT: lxv v30, 32(r1) # 16-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/frem.ll b/llvm/test/CodeGen/PowerPC/frem.ll index 19b4b1c9cdf95..21cb206ac43bb 100644 --- a/llvm/test/CodeGen/PowerPC/frem.ll +++ b/llvm/test/CodeGen/PowerPC/frem.ll @@ -70,6 +70,7 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: xscvspdpn 2, 0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxmrghd 0, 1, 61 ; CHECK-NEXT: xscvspdpn 1, 62 ; CHECK-NEXT: xscvspdpn 2, 63 @@ -83,6 +84,7 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: xscvspdpn 2, 0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxmrghd 0, 61, 1 ; CHECK-NEXT: lxv 63, 80(1) # 16-byte Folded Reload ; CHECK-NEXT: lxv 62, 64(1) # 16-byte Folded Reload @@ -124,6 +126,7 @@ define <2 x double> @frem2x64(<2 x double> %a, <2 x double> %b) { ; CHECK-NEXT: xxswapd 2, 63 ; CHECK-NEXT: bl fmod ; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxmrghd 34, 61, 1 ; CHECK-NEXT: lxv 63, 64(1) # 16-byte Folded Reload ; CHECK-NEXT: lxv 62, 48(1) # 16-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll index 238e200bfc782..3ae0b02f79e27 100644 --- a/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll +++ b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll @@ -41,39 +41,47 @@ define void @test(ptr %p1, ptr %p2) nounwind { ; CHECK-NEXT: xxswapd 61, 63 ; CHECK-NEXT: bl roundeven ; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxswapd 56, 1 ; CHECK-NEXT: xxlor 1, 59, 59 ; CHECK-NEXT: bl roundeven ; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxswapd 0, 1 ; CHECK-NEXT: xxlor 1, 60, 60 ; CHECK-NEXT: xxmrgld 59, 0, 56 ; CHECK-NEXT: bl roundeven ; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxswapd 60, 1 ; CHECK-NEXT: xxlor 1, 62, 62 ; CHECK-NEXT: bl roundeven ; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxswapd 0, 1 ; CHECK-NEXT: xxlor 1, 61, 61 ; CHECK-NEXT: xxmrgld 62, 0, 60 ; CHECK-NEXT: bl roundeven ; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxswapd 61, 1 ; CHECK-NEXT: xxlor 1, 63, 63 ; CHECK-NEXT: bl roundeven ; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxswapd 0, 1 ; CHECK-NEXT: xxlor 1, 57, 57 ; CHECK-NEXT: xxmrgld 63, 0, 61 ; CHECK-NEXT: bl roundeven ; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxswapd 61, 1 ; CHECK-NEXT: xxlor 1, 58, 58 ; CHECK-NEXT: bl roundeven ; CHECK-NEXT: nop ; CHECK-NEXT: li 3, 160 ; CHECK-NEXT: stxvd2x 63, 30, 29 +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxswapd 0, 1 ; CHECK-NEXT: stxvd2x 62, 30, 28 ; CHECK-NEXT: stxvd2x 59, 30, 27 diff --git a/llvm/test/CodeGen/PowerPC/half.ll b/llvm/test/CodeGen/PowerPC/half.ll index 903ea691ae6ba..8b5b7962da33f 100644 --- a/llvm/test/CodeGen/PowerPC/half.ll +++ b/llvm/test/CodeGen/PowerPC/half.ll @@ -1365,6 +1365,7 @@ define <4 x float> @test_extend32_vec4(ptr %p) nounwind { ; P8-NEXT: bl __extendhfsf2 ; P8-NEXT: nop ; P8-NEXT: li r3, 80 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; P8-NEXT: xxmrghd vs0, vs61, vs1 ; P8-NEXT: xxmrghd vs1, vs63, vs62 ; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll index 8d7253b5ce8e3..23748bca0b7b2 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp.ll @@ -107,6 +107,7 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) nounwind { ; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxmrghd vs0, vs1, v29 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: vextuwrx r3, r3, v31 @@ -123,6 +124,7 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) nounwind { ; CHECK-NEXT: xscvspdpn f1, vs0 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxmrghd vs0, vs1, v29 ; CHECK-NEXT: lxv v31, 80(r1) # 16-byte Folded Reload ; CHECK-NEXT: lxv v30, 64(r1) # 16-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll index 1b137c786cc91..203b3bd15490a 100644 --- a/llvm/test/CodeGen/PowerPC/llvm.modf.ll +++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll @@ -294,6 +294,7 @@ define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) { ; CHECK-NEXT: addi r4, r1, 40 ; CHECK-NEXT: bl modf ; CHECK-NEXT: nop +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxmrghd v2, v30, vs1 ; CHECK-NEXT: lfd f0, 32(r1) ; CHECK-NEXT: lfd f1, 40(r1) diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll index b006c78604648..0364166a1b29e 100644 --- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll @@ -928,21 +928,25 @@ entry: define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) { ; CHECK-LABEL: testDoubleImm1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxmrghd v2, v2, vs1 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testDoubleImm1: ; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-BE-NEXT: xxpermdi v2, vs1, v2, 1 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testDoubleImm1: ; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-P9-NEXT: xxpermdi v2, vs1, v2, 1 ; CHECK-P9-NEXT: blr ; ; AIX-P8-LABEL: testDoubleImm1: ; AIX-P8: # %bb.0: # %entry +; AIX-P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; AIX-P8-NEXT: xxpermdi v2, vs1, v2, 1 ; AIX-P8-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index 08ca1d153248e..e6307aa2906da 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -107,6 +107,10 @@ entry: define <3 x double> @constrained_vector_fdiv_v3f64(<3 x double> %x, <3 x double> %y) #0 { ; PC64LE-LABEL: constrained_vector_fdiv_v3f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5 +; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 0, 5, 4 ; PC64LE-NEXT: xxmrghd 1, 2, 1 ; PC64LE-NEXT: xsdivdp 3, 3, 6 @@ -116,6 +120,10 @@ define <3 x double> @constrained_vector_fdiv_v3f64(<3 x double> %x, <3 x double> ; ; PC64LE9-LABEL: constrained_vector_fdiv_v3f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5 +; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 0, 5, 4 ; PC64LE9-NEXT: xxmrghd 1, 2, 1 ; PC64LE9-NEXT: xsdivdp 3, 3, 6 @@ -209,6 +217,7 @@ define <2 x double> @constrained_vector_frem_v2f64(<2 x double> %x, <2 x double> ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 34, 61, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 64 @@ -239,6 +248,7 @@ define <2 x double> @constrained_vector_frem_v2f64(<2 x double> %x, <2 x double> ; PC64LE9-NEXT: xxswapd 2, 63 ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 34, 61, 1 ; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload @@ -390,6 +400,7 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double> ; PC64LE-NEXT: fmr 2, 30 ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: fmr 1, 29 ; PC64LE-NEXT: fmr 2, 31 @@ -431,6 +442,7 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double> ; PC64LE9-NEXT: fmr 2, 30 ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 1, 29 ; PC64LE9-NEXT: fmr 2, 31 @@ -486,6 +498,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double> ; PC64LE-NEXT: xxswapd 2, 62 ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 62, 59, 1 ; PC64LE-NEXT: xxlor 1, 61, 61 ; PC64LE-NEXT: xxlor 2, 63, 63 @@ -498,6 +511,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double> ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 112 ; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 35, 60, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 96 @@ -536,6 +550,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double> ; PC64LE9-NEXT: xxswapd 2, 62 ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 62, 59, 1 ; PC64LE9-NEXT: xscpsgndp 1, 61, 61 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 @@ -546,6 +561,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double> ; PC64LE9-NEXT: xxswapd 2, 63 ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 35, 60, 1 ; PC64LE9-NEXT: vmr 2, 30 ; PC64LE9-NEXT: lxv 63, 96(1) # 16-byte Folded Reload @@ -670,6 +686,10 @@ entry: define <3 x double> @constrained_vector_fmul_v3f64(<3 x double> %x, <3 x double> %y) #0 { ; PC64LE-LABEL: constrained_vector_fmul_v3f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5 +; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 0, 5, 4 ; PC64LE-NEXT: xxmrghd 1, 2, 1 ; PC64LE-NEXT: xsmuldp 3, 3, 6 @@ -679,6 +699,10 @@ define <3 x double> @constrained_vector_fmul_v3f64(<3 x double> %x, <3 x double> ; ; PC64LE9-LABEL: constrained_vector_fmul_v3f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5 +; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 0, 5, 4 ; PC64LE9-NEXT: xxmrghd 1, 2, 1 ; PC64LE9-NEXT: xsmuldp 3, 3, 6 @@ -820,6 +844,10 @@ entry: define <3 x double> @constrained_vector_fadd_v3f64(<3 x double> %x, <3 x double> %y) #0 { ; PC64LE-LABEL: constrained_vector_fadd_v3f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5 +; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 0, 5, 4 ; PC64LE-NEXT: xxmrghd 1, 2, 1 ; PC64LE-NEXT: xsadddp 3, 3, 6 @@ -829,6 +857,10 @@ define <3 x double> @constrained_vector_fadd_v3f64(<3 x double> %x, <3 x double> ; ; PC64LE9-LABEL: constrained_vector_fadd_v3f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5 +; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 0, 5, 4 ; PC64LE9-NEXT: xxmrghd 1, 2, 1 ; PC64LE9-NEXT: xsadddp 3, 3, 6 @@ -970,6 +1002,10 @@ entry: define <3 x double> @constrained_vector_fsub_v3f64(<3 x double> %x, <3 x double> %y) #0 { ; PC64LE-LABEL: constrained_vector_fsub_v3f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5 +; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 0, 5, 4 ; PC64LE-NEXT: xxmrghd 1, 2, 1 ; PC64LE-NEXT: xssubdp 3, 3, 6 @@ -979,6 +1015,10 @@ define <3 x double> @constrained_vector_fsub_v3f64(<3 x double> %x, <3 x double> ; ; PC64LE9-LABEL: constrained_vector_fsub_v3f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5 +; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 0, 5, 4 ; PC64LE9-NEXT: xxmrghd 1, 2, 1 ; PC64LE9-NEXT: xssubdp 3, 3, 6 @@ -1105,6 +1145,8 @@ entry: define <3 x double> @constrained_vector_sqrt_v3f64(<3 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_sqrt_v3f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 0, 2, 1 ; PC64LE-NEXT: xssqrtdp 3, 3 ; PC64LE-NEXT: xvsqrtdp 2, 0 @@ -1113,6 +1155,8 @@ define <3 x double> @constrained_vector_sqrt_v3f64(<3 x double> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_sqrt_v3f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 0, 2, 1 ; PC64LE9-NEXT: xssqrtdp 3, 3 ; PC64LE9-NEXT: xvsqrtdp 2, 0 @@ -1203,6 +1247,7 @@ define <2 x double> @constrained_vector_pow_v2f64(<2 x double> %x, <2 x double> ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 34, 61, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 64 @@ -1233,6 +1278,7 @@ define <2 x double> @constrained_vector_pow_v2f64(<2 x double> %x, <2 x double> ; PC64LE9-NEXT: xxswapd 2, 63 ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 34, 61, 1 ; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload @@ -1384,6 +1430,7 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double> ; PC64LE-NEXT: fmr 2, 30 ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: fmr 1, 29 ; PC64LE-NEXT: fmr 2, 31 @@ -1425,6 +1472,7 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double> ; PC64LE9-NEXT: fmr 2, 30 ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 1, 29 ; PC64LE9-NEXT: fmr 2, 31 @@ -1480,6 +1528,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double> ; PC64LE-NEXT: xxswapd 2, 62 ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 62, 59, 1 ; PC64LE-NEXT: xxlor 1, 61, 61 ; PC64LE-NEXT: xxlor 2, 63, 63 @@ -1492,6 +1541,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double> ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 112 ; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 35, 60, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 96 @@ -1530,6 +1580,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double> ; PC64LE9-NEXT: xxswapd 2, 62 ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 62, 59, 1 ; PC64LE9-NEXT: xscpsgndp 1, 61, 61 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 @@ -1540,6 +1591,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double> ; PC64LE9-NEXT: xxswapd 2, 63 ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 35, 60, 1 ; PC64LE9-NEXT: vmr 2, 30 ; PC64LE9-NEXT: lxv 63, 96(1) # 16-byte Folded Reload @@ -1618,6 +1670,7 @@ define <2 x double> @constrained_vector_powi_v2f64(<2 x double> %x, i32 %y) #0 { ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: ld 30, 80(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload @@ -1647,6 +1700,7 @@ define <2 x double> @constrained_vector_powi_v2f64(<2 x double> %x, i32 %y) #0 { ; PC64LE9-NEXT: mr 4, 30 ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload @@ -1790,6 +1844,7 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 { ; PC64LE-NEXT: mr 4, 30 ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: fmr 1, 31 ; PC64LE-NEXT: mr 4, 30 @@ -1828,6 +1883,7 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 { ; PC64LE9-NEXT: mr 4, 30 ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 1, 31 ; PC64LE9-NEXT: mr 4, 30 @@ -1878,6 +1934,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 { ; PC64LE-NEXT: mr 4, 30 ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 62, 61, 1 ; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: mr 4, 30 @@ -1890,6 +1947,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 { ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 35, 61, 1 ; PC64LE-NEXT: ld 30, 96(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload @@ -1923,6 +1981,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 { ; PC64LE9-NEXT: mr 4, 30 ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 62, 61, 1 ; PC64LE9-NEXT: xscpsgndp 1, 63, 63 ; PC64LE9-NEXT: mr 4, 30 @@ -1933,6 +1992,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 { ; PC64LE9-NEXT: mr 4, 30 ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 35, 61, 1 ; PC64LE9-NEXT: vmr 2, 30 ; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload @@ -2003,6 +2063,7 @@ define <2 x double> @constrained_vector_sin_v2f64(<2 x double> %x) #0 { ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 48 @@ -2027,6 +2088,7 @@ define <2 x double> @constrained_vector_sin_v2f64(<2 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload @@ -2149,6 +2211,7 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 { ; PC64LE-NEXT: fmr 1, 30 ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: fmr 1, 31 ; PC64LE-NEXT: bl sin @@ -2181,6 +2244,7 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 { ; PC64LE9-NEXT: fmr 1, 30 ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 1, 31 ; PC64LE9-NEXT: bl sin @@ -2224,6 +2288,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: xxswapd 1, 62 ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 62, 61, 1 ; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl sin @@ -2234,6 +2299,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 35, 61, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 64 @@ -2262,6 +2328,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 62 ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 62, 61, 1 ; PC64LE9-NEXT: xscpsgndp 1, 63, 63 ; PC64LE9-NEXT: bl sin @@ -2270,6 +2337,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 35, 61, 1 ; PC64LE9-NEXT: vmr 2, 30 ; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload @@ -2338,6 +2406,7 @@ define <2 x double> @constrained_vector_cos_v2f64(<2 x double> %x) #0 { ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 48 @@ -2362,6 +2431,7 @@ define <2 x double> @constrained_vector_cos_v2f64(<2 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload @@ -2484,6 +2554,7 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 { ; PC64LE-NEXT: fmr 1, 30 ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: fmr 1, 31 ; PC64LE-NEXT: bl cos @@ -2516,6 +2587,7 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 { ; PC64LE9-NEXT: fmr 1, 30 ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 1, 31 ; PC64LE9-NEXT: bl cos @@ -2559,6 +2631,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: xxswapd 1, 62 ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 62, 61, 1 ; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl cos @@ -2569,6 +2642,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 35, 61, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 64 @@ -2597,6 +2671,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 62 ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 62, 61, 1 ; PC64LE9-NEXT: xscpsgndp 1, 63, 63 ; PC64LE9-NEXT: bl cos @@ -2605,6 +2680,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 35, 61, 1 ; PC64LE9-NEXT: vmr 2, 30 ; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload @@ -2673,6 +2749,7 @@ define <2 x double> @constrained_vector_exp_v2f64(<2 x double> %x) #0 { ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 48 @@ -2697,6 +2774,7 @@ define <2 x double> @constrained_vector_exp_v2f64(<2 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload @@ -2819,6 +2897,7 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 { ; PC64LE-NEXT: fmr 1, 30 ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: fmr 1, 31 ; PC64LE-NEXT: bl exp @@ -2851,6 +2930,7 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 { ; PC64LE9-NEXT: fmr 1, 30 ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 1, 31 ; PC64LE9-NEXT: bl exp @@ -2894,6 +2974,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: xxswapd 1, 62 ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 62, 61, 1 ; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl exp @@ -2904,6 +2985,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 35, 61, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 64 @@ -2932,6 +3014,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 62 ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 62, 61, 1 ; PC64LE9-NEXT: xscpsgndp 1, 63, 63 ; PC64LE9-NEXT: bl exp @@ -2940,6 +3023,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 35, 61, 1 ; PC64LE9-NEXT: vmr 2, 30 ; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload @@ -3008,6 +3092,7 @@ define <2 x double> @constrained_vector_exp2_v2f64(<2 x double> %x) #0 { ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 48 @@ -3032,6 +3117,7 @@ define <2 x double> @constrained_vector_exp2_v2f64(<2 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload @@ -3154,6 +3240,7 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 { ; PC64LE-NEXT: fmr 1, 30 ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: fmr 1, 31 ; PC64LE-NEXT: bl exp2 @@ -3186,6 +3273,7 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 { ; PC64LE9-NEXT: fmr 1, 30 ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 1, 31 ; PC64LE9-NEXT: bl exp2 @@ -3229,6 +3317,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: xxswapd 1, 62 ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 62, 61, 1 ; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl exp2 @@ -3239,6 +3328,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 35, 61, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 64 @@ -3267,6 +3357,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 62 ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 62, 61, 1 ; PC64LE9-NEXT: xscpsgndp 1, 63, 63 ; PC64LE9-NEXT: bl exp2 @@ -3275,6 +3366,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 35, 61, 1 ; PC64LE9-NEXT: vmr 2, 30 ; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload @@ -3343,6 +3435,7 @@ define <2 x double> @constrained_vector_log_v2f64(<2 x double> %x) #0 { ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 48 @@ -3367,6 +3460,7 @@ define <2 x double> @constrained_vector_log_v2f64(<2 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload @@ -3489,6 +3583,7 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 { ; PC64LE-NEXT: fmr 1, 30 ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: fmr 1, 31 ; PC64LE-NEXT: bl log @@ -3521,6 +3616,7 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 { ; PC64LE9-NEXT: fmr 1, 30 ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 1, 31 ; PC64LE9-NEXT: bl log @@ -3564,6 +3660,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: xxswapd 1, 62 ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 62, 61, 1 ; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl log @@ -3574,6 +3671,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 35, 61, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 64 @@ -3602,6 +3700,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 62 ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 62, 61, 1 ; PC64LE9-NEXT: xscpsgndp 1, 63, 63 ; PC64LE9-NEXT: bl log @@ -3610,6 +3709,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 35, 61, 1 ; PC64LE9-NEXT: vmr 2, 30 ; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload @@ -3678,6 +3778,7 @@ define <2 x double> @constrained_vector_log10_v2f64(<2 x double> %x) #0 { ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 48 @@ -3702,6 +3803,7 @@ define <2 x double> @constrained_vector_log10_v2f64(<2 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload @@ -3824,6 +3926,7 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 { ; PC64LE-NEXT: fmr 1, 30 ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: fmr 1, 31 ; PC64LE-NEXT: bl log10 @@ -3856,6 +3959,7 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 { ; PC64LE9-NEXT: fmr 1, 30 ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 1, 31 ; PC64LE9-NEXT: bl log10 @@ -3899,6 +4003,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: xxswapd 1, 62 ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 62, 61, 1 ; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl log10 @@ -3909,6 +4014,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 35, 61, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 64 @@ -3937,6 +4043,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 62 ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 62, 61, 1 ; PC64LE9-NEXT: xscpsgndp 1, 63, 63 ; PC64LE9-NEXT: bl log10 @@ -3945,6 +4052,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 35, 61, 1 ; PC64LE9-NEXT: vmr 2, 30 ; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload @@ -4013,6 +4121,7 @@ define <2 x double> @constrained_vector_log2_v2f64(<2 x double> %x) #0 { ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 48 @@ -4037,6 +4146,7 @@ define <2 x double> @constrained_vector_log2_v2f64(<2 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload @@ -4159,6 +4269,7 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 { ; PC64LE-NEXT: fmr 1, 30 ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: fmr 1, 31 ; PC64LE-NEXT: bl log2 @@ -4191,6 +4302,7 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 { ; PC64LE9-NEXT: fmr 1, 30 ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 1, 31 ; PC64LE9-NEXT: bl log2 @@ -4234,6 +4346,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: xxswapd 1, 62 ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 62, 61, 1 ; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl log2 @@ -4244,6 +4357,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 35, 61, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 64 @@ -4272,6 +4386,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 62 ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 62, 61, 1 ; PC64LE9-NEXT: xscpsgndp 1, 63, 63 ; PC64LE9-NEXT: bl log2 @@ -4280,6 +4395,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 35, 61, 1 ; PC64LE9-NEXT: vmr 2, 30 ; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload @@ -4387,6 +4503,8 @@ define <3 x float> @constrained_vector_rint_v3f32(<3 x float> %x) #0 { define <3 x double> @constrained_vector_rint_v3f64(<3 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_rint_v3f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 0, 2, 1 ; PC64LE-NEXT: xsrdpic 3, 3 ; PC64LE-NEXT: xvrdpic 2, 0 @@ -4395,6 +4513,8 @@ define <3 x double> @constrained_vector_rint_v3f64(<3 x double> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_rint_v3f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 0, 2, 1 ; PC64LE9-NEXT: xsrdpic 3, 3 ; PC64LE9-NEXT: xvrdpic 2, 0 @@ -4479,6 +4599,7 @@ define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 { ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 48 @@ -4503,6 +4624,7 @@ define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload @@ -4625,6 +4747,7 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 { ; PC64LE-NEXT: fmr 1, 30 ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: fmr 1, 31 ; PC64LE-NEXT: bl nearbyint @@ -4657,6 +4780,7 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 { ; PC64LE9-NEXT: fmr 1, 30 ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 1, 31 ; PC64LE9-NEXT: bl nearbyint @@ -4700,6 +4824,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: xxswapd 1, 62 ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 62, 61, 1 ; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl nearbyint @@ -4710,6 +4835,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 35, 61, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 64 @@ -4738,6 +4864,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 62 ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 62, 61, 1 ; PC64LE9-NEXT: xscpsgndp 1, 63, 63 ; PC64LE9-NEXT: bl nearbyint @@ -4746,6 +4873,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 35, 61, 1 ; PC64LE9-NEXT: vmr 2, 30 ; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload @@ -4927,6 +5055,10 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double> ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: stdu 1, -64(1) ; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5 +; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 0, 5, 4 ; PC64LE-NEXT: xxmrghd 1, 2, 1 ; PC64LE-NEXT: std 0, 80(1) @@ -4950,6 +5082,10 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double> ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -48(1) +; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5 +; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 0, 5, 4 ; PC64LE9-NEXT: xxmrghd 1, 2, 1 ; PC64LE9-NEXT: std 0, 64(1) @@ -5159,6 +5295,10 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double> ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: stdu 1, -64(1) ; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5 +; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 0, 5, 4 ; PC64LE-NEXT: xxmrghd 1, 2, 1 ; PC64LE-NEXT: std 0, 80(1) @@ -5182,6 +5322,10 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double> ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -48(1) +; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5 +; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 0, 5, 4 ; PC64LE9-NEXT: xxmrghd 1, 2, 1 ; PC64LE9-NEXT: std 0, 64(1) @@ -6520,6 +6664,8 @@ entry: define <3 x double> @constrained_vector_ceil_v3f64(<3 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_ceil_v3f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 0, 2, 1 ; PC64LE-NEXT: xsrdpip 3, 3 ; PC64LE-NEXT: xvrdpip 2, 0 @@ -6528,6 +6674,8 @@ define <3 x double> @constrained_vector_ceil_v3f64(<3 x double> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_ceil_v3f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 0, 2, 1 ; PC64LE9-NEXT: xsrdpip 3, 3 ; PC64LE9-NEXT: xvrdpip 2, 0 @@ -6628,6 +6776,8 @@ entry: define <3 x double> @constrained_vector_floor_v3f64(<3 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_floor_v3f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 0, 2, 1 ; PC64LE-NEXT: xsrdpim 3, 3 ; PC64LE-NEXT: xvrdpim 2, 0 @@ -6636,6 +6786,8 @@ define <3 x double> @constrained_vector_floor_v3f64(<3 x double> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_floor_v3f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 0, 2, 1 ; PC64LE9-NEXT: xsrdpim 3, 3 ; PC64LE9-NEXT: xvrdpim 2, 0 @@ -6736,6 +6888,8 @@ entry: define <3 x double> @constrained_vector_round_v3f64(<3 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_round_v3f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 0, 2, 1 ; PC64LE-NEXT: xsrdpi 3, 3 ; PC64LE-NEXT: xvrdpi 2, 0 @@ -6744,6 +6898,8 @@ define <3 x double> @constrained_vector_round_v3f64(<3 x double> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_round_v3f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 0, 2, 1 ; PC64LE9-NEXT: xsrdpi 3, 3 ; PC64LE9-NEXT: xvrdpi 2, 0 @@ -6843,6 +6999,8 @@ entry: define <3 x double> @constrained_vector_trunc_v3f64(<3 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_trunc_v3f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 0, 2, 1 ; PC64LE-NEXT: xsrdpiz 3, 3 ; PC64LE-NEXT: xvrdpiz 2, 0 @@ -6851,6 +7009,8 @@ define <3 x double> @constrained_vector_trunc_v3f64(<3 x double> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_trunc_v3f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 0, 2, 1 ; PC64LE9-NEXT: xsrdpiz 3, 3 ; PC64LE9-NEXT: xvrdpiz 2, 0 @@ -8049,6 +8209,7 @@ define <2 x double> @constrained_vector_tan_v2f64(<2 x double> %x) #0 { ; PC64LE-NEXT: bl tan ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 48 @@ -8073,6 +8234,7 @@ define <2 x double> @constrained_vector_tan_v2f64(<2 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl tan ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload @@ -8195,6 +8357,7 @@ define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 { ; PC64LE-NEXT: fmr 1, 30 ; PC64LE-NEXT: bl tan ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: fmr 1, 31 ; PC64LE-NEXT: bl tan @@ -8227,6 +8390,7 @@ define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 { ; PC64LE9-NEXT: fmr 1, 30 ; PC64LE9-NEXT: bl tan ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 1, 31 ; PC64LE9-NEXT: bl tan @@ -8270,6 +8434,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: xxswapd 1, 62 ; PC64LE-NEXT: bl tan ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 62, 61, 1 ; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl tan @@ -8280,6 +8445,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 { ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 35, 61, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 64 @@ -8308,6 +8474,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 62 ; PC64LE9-NEXT: bl tan ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 62, 61, 1 ; PC64LE9-NEXT: xscpsgndp 1, 63, 63 ; PC64LE9-NEXT: bl tan @@ -8316,6 +8483,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 { ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: bl tan ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 35, 61, 1 ; PC64LE9-NEXT: vmr 2, 30 ; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload @@ -8390,6 +8558,7 @@ define <2 x double> @constrained_vector_atan2_v2f64(<2 x double> %x, <2 x double ; PC64LE-NEXT: bl atan2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 34, 61, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 64 @@ -8420,6 +8589,7 @@ define <2 x double> @constrained_vector_atan2_v2f64(<2 x double> %x, <2 x double ; PC64LE9-NEXT: xxswapd 2, 63 ; PC64LE9-NEXT: bl atan2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 34, 61, 1 ; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload @@ -8571,6 +8741,7 @@ define <3 x double> @constrained_vector_atan2_v3f64(<3 x double> %x, <3 x double ; PC64LE-NEXT: fmr 2, 30 ; PC64LE-NEXT: bl atan2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: fmr 1, 29 ; PC64LE-NEXT: fmr 2, 31 @@ -8612,6 +8783,7 @@ define <3 x double> @constrained_vector_atan2_v3f64(<3 x double> %x, <3 x double ; PC64LE9-NEXT: fmr 2, 30 ; PC64LE9-NEXT: bl atan2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 1, 29 ; PC64LE9-NEXT: fmr 2, 31 @@ -8667,6 +8839,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double ; PC64LE-NEXT: xxswapd 2, 62 ; PC64LE-NEXT: bl atan2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 62, 59, 1 ; PC64LE-NEXT: xxlor 1, 61, 61 ; PC64LE-NEXT: xxlor 2, 63, 63 @@ -8679,6 +8852,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 112 ; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: xxmrghd 35, 60, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 96 @@ -8717,6 +8891,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double ; PC64LE9-NEXT: xxswapd 2, 62 ; PC64LE9-NEXT: bl atan2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 62, 59, 1 ; PC64LE9-NEXT: xscpsgndp 1, 61, 61 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 @@ -8727,6 +8902,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double ; PC64LE9-NEXT: xxswapd 2, 63 ; PC64LE9-NEXT: bl atan2 ; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: xxmrghd 35, 60, 1 ; PC64LE9-NEXT: vmr 2, 30 ; PC64LE9-NEXT: lxv 63, 96(1) # 16-byte Folded Reload diff --git a/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll new file mode 100644 index 0000000000000..ea7454faad218 --- /dev/null +++ b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll @@ -0,0 +1,185 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=x86_64-grtev4-linux-gnu < %s | FileCheck %s + +%struct.wibble = type { %struct.wombat } +%struct.wombat = type { %struct.ham, [3 x i8] } +%struct.ham = type { %struct.zot } +%struct.zot = type { %struct.blam } +%struct.blam = type { %struct.ham.0 } +%struct.ham.0 = type { %struct.bar } +%struct.bar = type { %struct.bar.1 } +%struct.bar.1 = type { %struct.baz, i8 } +%struct.baz = type { %struct.snork } +%struct.snork = type <{ %struct.spam, i8, [3 x i8] }> +%struct.spam = type { %struct.snork.2, %struct.snork.2 } +%struct.snork.2 = type { i32 } +%struct.snork.3 = type { %struct.baz, i8, [3 x i8] } + +define void @foo(ptr %arg, ptr %arg1, i40 %arg2, ptr %arg3, i32 %arg4) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: movl %r8d, %r14d +; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq %rsi, %r13 +; CHECK-NEXT: movq %rdi, %r15 +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: # implicit-def: $r12 +; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: # %bb17 +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movq %r15, %r13 +; CHECK-NEXT: xorl %r15d, %r15d +; CHECK-NEXT: testq %rbx, %rbx +; CHECK-NEXT: sete %r15b +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq _Znwm@PLT +; CHECK-NEXT: shll $4, %r15d +; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; CHECK-NEXT: movq %r12, %rcx +; CHECK-NEXT: shrq $32, %rcx +; CHECK-NEXT: movb %cl, 12(%rax) +; CHECK-NEXT: movl %r12d, 8(%rax) +; CHECK-NEXT: movq %r15, %rbx +; CHECK-NEXT: movq %r13, %r15 +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; CHECK-NEXT: decl %r14d +; CHECK-NEXT: je .LBB0_8 +; CHECK-NEXT: .LBB0_3: # %bb7 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: callq widget@PLT +; CHECK-NEXT: cmpb $-5, (%r13) +; CHECK-NEXT: jae .LBB0_5 +; CHECK-NEXT: # %bb.4: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movl %r12d, %r12d +; CHECK-NEXT: cmpq %r15, %rbx +; CHECK-NEXT: jbe .LBB0_1 +; CHECK-NEXT: jmp .LBB0_7 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_5: # %bb12 +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movq 0, %rax +; CHECK-NEXT: movq 8, %rax +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; CHECK-NEXT: cmpq %r15, %rbx +; CHECK-NEXT: jbe .LBB0_1 +; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: decl %r14d +; CHECK-NEXT: jne .LBB0_3 +; CHECK-NEXT: .LBB0_8: # %bb21 +; CHECK-NEXT: cmpb $0, 12(%rax) +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: # %bb.9: # %bb26 +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_10: # %bb25 +; CHECK-NEXT: .cfi_def_cfa %rbp, 16 +; CHECK-NEXT: movq %r15, %rdi +; CHECK-NEXT: callq pluto@PLT +bb: + br label %bb7 + +bb5: ; preds = %bb17, %bb14 + %phi = phi ptr [ %call19, %bb17 ], [ null, %bb14 ] + %phi6 = phi ptr [ %getelementptr, %bb17 ], [ null, %bb14 ] + %add = add i32 %phi9, 1 + %icmp = icmp eq i32 %phi9, %arg4 + br i1 %icmp, label %bb21, label %bb7 + +bb7: ; preds = %bb5, %bb + %phi8 = phi ptr [ null, %bb ], [ %phi6, %bb5 ] + %phi9 = phi i32 [ 0, %bb ], [ %add, %bb5 ] + %phi10 = phi i40 [ poison, %bb ], [ %phi15, %bb5 ] + %call = call ptr @widget() + %load = load i8, ptr %arg1, align 8 + %icmp11 = icmp ult i8 %load, -5 + %and = and i40 %phi10, 4294967295 + br i1 %icmp11, label %bb14, label %bb12 + +bb12: ; preds = %bb7 + %load13 = load volatile { i64, i64 }, ptr null, align 4294967296 + br label %bb14 + +bb14: ; preds = %bb12, %bb7 + %phi15 = phi i40 [ %and, %bb7 ], [ %arg2, %bb12 ] + %icmp16 = icmp ugt ptr %phi8, %arg + br i1 %icmp16, label %bb5, label %bb17 + +bb17: ; preds = %bb14 + %icmp18 = icmp eq ptr %phi8, null + %zext = zext i1 %icmp18 to i64 + %call19 = call ptr @_Znwm(i64 0) + %getelementptr = getelementptr %struct.wibble, ptr %arg3, i64 %zext + %getelementptr20 = getelementptr i8, ptr %call19, i64 8 + store i40 %phi15, ptr %getelementptr20, align 4 + br label %bb5 + +bb21: ; preds = %bb5 + %getelementptr22 = getelementptr %struct.snork.3, ptr %phi, i64 0, i32 1 + %load23 = load i8, ptr %getelementptr22, align 4 + %icmp24 = icmp eq i8 %load23, 0 + br i1 %icmp24, label %bb26, label %bb25 + +bb25: ; preds = %bb21 + call void @pluto(ptr %arg) + unreachable + +bb26: ; preds = %bb21 + ret void +} + +define void @eggs(ptr %arg, ptr %arg1) { +; CHECK-LABEL: eggs: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rsi, %rdi +; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %r8d, %r8d +; CHECK-NEXT: callq foo@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +bb: + call void @foo(ptr %arg1, ptr %arg, i40 0, ptr null, i32 0) + ret void +} + +declare ptr @widget() + +declare void @pluto(ptr) + +declare ptr @_Znwm(i64) + +attributes #0 = { noinline "frame-pointer"="all" } diff --git a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir index 8241a1757af52..0bc208dc709d7 100644 --- a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir +++ b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=register-coalescer -o - %s | FileCheck %s +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=register-coalescer -o - %s | FileCheck %s --match-full-lines --- name: rematerialize_subreg_to_reg_added_impdef_1 tracksRegLiveness: true @@ -9,7 +9,7 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x2aaaaaab), %bb.2(0x55555555) ; CHECK-NEXT: liveins: $edi ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]] ; CHECK-NEXT: JCC_1 %bb.2, 5, implicit killed undef $eflags ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -28,7 +28,7 @@ body: | ; CHECK-NEXT: JCC_1 %bb.5, 5, implicit killed undef $eflags ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al + ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al, implicit-def $al ; CHECK-NEXT: RET 0, killed undef $al ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: diff --git a/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir b/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir new file mode 100644 index 0000000000000..2e6395f065e25 --- /dev/null +++ b/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir @@ -0,0 +1,44 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -enable-subreg-liveness -verify-coalescing -o - %s | FileCheck %s + +--- +name: requires_new_subrange_coalesce_subreg_to_reg +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: requires_new_subrange_coalesce_subreg_to_reg + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $eax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = COPY $eax + ; CHECK-NEXT: %b:gr32 = IMPLICIT_DEF + ; CHECK-NEXT: %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit + ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit undef $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: %c.sub_32bit:gr64 = COPY %a + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %c.sub_32bit:gr64 = SUBREG_TO_REG %a, %b, %subreg.sub_32bit + ; CHECK-NEXT: RET 0, implicit %c + bb.0: + liveins: $eax + %init_eax:gr32 = COPY $eax + %a:gr64 = SUBREG_TO_REG 0, %init_eax, %subreg.sub_32bit + %b:gr32 = IMPLICIT_DEF + %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit + JCC_1 %bb.2, 4, implicit undef $eflags + + bb.1: + %imm0:gr32 = MOV32r0 implicit-def dead $eflags + %a = SUBREG_TO_REG 0, %imm0, %subreg.sub_32bit + %c.sub_32bit = COPY %a + + bb.2: + %c.sub_32bit = SUBREG_TO_REG %a, %b, %subreg.sub_32bit + RET 0, implicit %c + +... diff --git a/llvm/test/CodeGen/X86/pr76416.ll b/llvm/test/CodeGen/X86/pr76416.ll new file mode 100644 index 0000000000000..68e9ef9c87f6e --- /dev/null +++ b/llvm/test/CodeGen/X86/pr76416.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; +; Reproducer from https://github.com/llvm/llvm-project/issues/76416 +; + +@load_p = external global ptr, align 8 +@load_data = external global i8, align 1 + +define dso_local void @pr76416() { +; CHECK-LABEL: pr76416: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: jg .LBB0_3 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: incl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: jle .LBB0_2 +; CHECK-NEXT: .LBB0_3: # %for.end +; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq load_p@GOTPCREL(%rip), %rax +; CHECK-NEXT: movq load_data@GOTPCREL(%rip), %rcx +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_4: # %for.cond1 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movq (%rax), %rdx +; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: movzbl (%rdx,%rsi), %edx +; CHECK-NEXT: movb %dl, (%rcx) +; CHECK-NEXT: leal 1(%rsi), %edx +; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: jmp .LBB0_4 +entry: + %alloca = alloca i32, align 4 + store i32 0, ptr %alloca, align 4 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %load.from.alloca.0 = load i32, ptr %alloca, align 4 + %cmp = icmp slt i32 %load.from.alloca.0, 4 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + call void asm sideeffect "", "{ax},~{dirflag},~{fpsr},~{flags}"(i8 0) nounwind + %load.from.alloca.1 = load i32, ptr %alloca, align 4 + %inc = add nsw i32 %load.from.alloca.1, 1 + store i32 %inc, ptr %alloca, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + store i32 0, ptr %alloca, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.cond1, %for.end + call void asm sideeffect "", "N{dx},~{dirflag},~{fpsr},~{flags}"(i32 poison) nounwind + %load.from.load_p = load ptr, ptr @load_p, align 8 + %regs = getelementptr inbounds { [4 x i8] }, ptr %load.from.load_p, i32 0, i32 0 + %load.from.alloca.2 = load i32, ptr %alloca, align 4 + %idxprom = sext i32 %load.from.alloca.2 to i64 + %arrayidx = getelementptr inbounds [4 x i8], ptr %regs, i64 0, i64 %idxprom + %load.with.gep.ptr = load i8, ptr %arrayidx, align 1 + store i8 %load.with.gep.ptr, ptr @load_data, align 1 + %load.from.alloca.3 = load i32, ptr %alloca, align 4 + %inc2 = add nsw i32 %load.from.alloca.3, 1 + store i32 %inc2, ptr %alloca, align 4 + br label %for.cond1 +} diff --git a/llvm/test/CodeGen/X86/subreg-fail.mir b/llvm/test/CodeGen/X86/subreg-fail.mir index c8146f099b814..dc690719e8581 100644 --- a/llvm/test/CodeGen/X86/subreg-fail.mir +++ b/llvm/test/CodeGen/X86/subreg-fail.mir @@ -14,8 +14,8 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test1 - ; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`) - ; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`) + ; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg, implicit-def [[MOV32rm]] :: (volatile load (s32) from `ptr undef`) + ; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg, implicit-def [[MOV32rm1]] :: (volatile load (s32) from `ptr undef`) ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32rm1]], 32, implicit-def dead $eflags ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = LEA64r [[MOV32rm1]], 1, [[MOV32rm]], 256, $noreg ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = SHR64ri [[LEA64r]], 8, implicit-def dead $eflags diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir new file mode 100644 index 0000000000000..ff946b76e8f61 --- /dev/null +++ b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir @@ -0,0 +1,451 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -o - %s | FileCheck %s --match-full-lines + +# We cannot lose the liveness of the high subregister of %1 when +# coalesced with %0, so introduce an implicit-def of the super +# register on the MOV. + +--- +name: coalesce_mov32r0_into_subreg_to_reg64 +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64 + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]] + ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi + ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %0:gr32 = MOV32r0 implicit-def dead $eflags + %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + $rdi = COPY %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +--- +name: subreg_to_reg_folds_to_undef +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $rax + + ; CHECK-LABEL: name: subreg_to_reg_folds_to_undef + ; CHECK: liveins: $rax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64_with_sub_8bit = COPY $rax + ; CHECK-NEXT: undef [[MOV32rr:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rr [[COPY]].sub_32bit, implicit-def [[MOV32rr]] + ; CHECK-NEXT: RET 0, implicit [[MOV32rr]] + %0:gr64 = COPY killed $rax + %1:gr32 = COPY killed %0.sub_32bit + %2:gr32 = MOV32rr killed %1 + %3:gr64 = SUBREG_TO_REG 0, killed %2, %subreg.sub_32bit + %4:gr64 = COPY killed %3 + RET 0, implicit %4 + +... + +--- +name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64 +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64 + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]] + ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi + ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags + %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit + $rdi = COPY %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +--- +name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64 +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64 + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]] + ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi + ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0 + %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit + $rdi = COPY %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +--- +name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_]].sub_8bit, implicit-def [[MOV32r0_]] + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit [[MOV32r0_]] + ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %0:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def undef %0.sub_8bit + %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + INLINEASM &"", 0, implicit %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + + +# Reduced realistic case which was asserting after introducing new implicit-defs +--- +name: coalesce_needs_implicit_defs +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: coalesce_needs_implicit_defs + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $rdi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]] + ; CHECK-NEXT: undef [[MOV32r0_1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: TEST64rr [[MOV32r0_1]], [[MOV32r0_1]], implicit-def $eflags + ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]].sub_8bit:gr64_with_sub_8bit = SETCCr 4, implicit killed $eflags + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi + ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32r0_2]], 4, implicit-def dead $eflags + ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]]:gr64_with_sub_8bit = ADD64rr [[MOV32r0_2]], [[COPY]], implicit-def dead $eflags + ; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr64_with_sub_8bit = COPY [[MOV32r0_2]] + ; CHECK-NEXT: JMP_1 %bb.1 + bb.0: + liveins: $rdi + + %0:gr64 = COPY killed $rdi + %1:gr32 = MOV32r0 implicit-def dead $eflags + %2:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit + %3:gr64 = COPY killed %2 + + bb.1: + %4:gr64 = COPY killed %3 + %5:gr32 = MOV32r0 implicit-def dead $eflags + TEST64rr killed %4, %4, implicit-def $eflags + %6:gr8 = SETCCr 4, implicit killed $eflags + %7:gr32 = COPY killed %5 + %7.sub_8bit:gr32 = COPY killed %6 + %8:gr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32bit + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %9:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit + $rdi = COPY %9 + CALL64r killed %9, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %10:gr64 = COPY killed %8 + %10:gr64 = SHL64ri %10, 4, implicit-def dead $eflags + %11:gr64 = COPY killed %10 + %11:gr64 = ADD64rr %11, %0, implicit-def dead $eflags + %3:gr64 = COPY killed %11 + JMP_1 %bb.1 + +... + +# Make sure to add the 'undef' flag to the result register %2, +# because the top 32bits are not defined. +--- +name: coalesce_add_implicitdef_and_undef +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: coalesce_add_implicitdef_and_undef + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $eflags, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $edx + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = ADD32ri [[COPY]].sub_32bit, -34, implicit-def $eflags, implicit-def [[COPY]] + ; CHECK-NEXT: FAKE_USE [[COPY]] + ; CHECK-NEXT: RET 0 + bb.0: + liveins: $eflags, $edx + %0:gr32 = COPY $edx + JMP_1 %bb.1 + + bb.1: + %1:gr32 = COPY %0 + %1:gr32 = ADD32ri %1, -34, implicit-def $eflags + %2:gr64_with_sub_8bit = SUBREG_TO_REG 0, killed %1, %subreg.sub_32bit + FAKE_USE %2 + RET 0 +... + +# We can't mark the destination register as 'undef' or add implicit-def +# because the top 24 bits of %0:gr32 are retained by the SUBREG_TO_REG. +# +# For example, if this were to result in: +# +# undef %2.sub_32bit:gr64_with_sub_8bit = COPY $edx +# %1:gr8 = SETCCr 4, implicit $eflags +# JMP_1 %bb.1 +# +# bb.1: +# undef %2.sub_8bit:gr64_with_sub_8bit = COPY %1, implicit-def %2 +# +# Then this says that the top 56 bits of %2 are undef. That's not correct +# because only the top 32 bits are undef. +--- +name: coalesce_dont_add_implicitdef_or_undef +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: coalesce_dont_add_implicitdef_or_undef + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $eflags, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $edx + ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: [[COPY:%[0-9]+]].sub_8bit:gr64_with_sub_8bit = COPY [[SETCCr]] + ; CHECK-NEXT: FAKE_USE [[COPY]] + ; CHECK-NEXT: RET 0 + bb.0: + liveins: $eflags, $edx + %0:gr32 = COPY $edx + %1:gr8 = SETCCr 4, implicit killed $eflags + JMP_1 %bb.1 + + bb.1: + %0.sub_8bit:gr32 = COPY %1 + %2:gr64_with_sub_8bit = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + FAKE_USE %2 + RET 0 +... + +--- +name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi + ; CHECK-NEXT: CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %0:gr32 = MOV32r0 implicit-def dead $eflags + $rdi = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +--- +name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $eax + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use + ; CHECK: liveins: $eax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: $eax = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, $eax, %subreg.sub_32bit + ; CHECK-NEXT: $rdi = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: CALL64r [[SUBREG_TO_REG]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $eax = MOV32r0 implicit-def dead $eflags + %1:gr64 = SUBREG_TO_REG 0, killed $eax, %subreg.sub_32bit + $rdi = COPY %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +# Coalesced instruction is a copy with other implicit operands +--- +name: coalesce_copy_into_subreg_to_reg64 +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $eax + ; CHECK-LABEL: name: coalesce_copy_into_subreg_to_reg64 + ; CHECK: liveins: $eax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $eax, implicit-def dead $eflags, implicit-def [[COPY]] + ; CHECK-NEXT: $rdi = COPY [[COPY]] + ; CHECK-NEXT: CALL64r [[COPY]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %0:gr32 = COPY $eax, implicit-def dead $eflags + %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + $rdi = COPY %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +--- +name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef [[MOV32r0_]].sub_32bit, implicit [[MOV32r0_]].sub_32bit, implicit-def [[MOV32r0_]] + ; CHECK-NEXT: $rdi = COPY [[MOV32r0_]] + ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %0:gr32 = MOV32r0 implicit-def dead $eflags + INLINEASM &"", 0, implicit-def %0, implicit %0 + %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + $rdi = COPY %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +--- +name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1 + ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags + ; CHECK-NEXT: JMP_1 %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: $rdi = COPY %1 + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + bb.0: + INLINEASM &"", 0, implicit-def %0:gr32 + JCC_1 %bb.1, 4, implicit undef $eflags + JMP_1 %bb.2 + + bb.1: + %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + $rdi = COPY %1 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + + bb.2: + +... + +--- +name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1 + ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags + ; CHECK-NEXT: JMP_1 %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $rdi = COPY %1 + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + bb.0: + + INLINEASM &"", 0, implicit-def %0:gr32 + JCC_1 %bb.1, 4, implicit undef $eflags + JMP_1 %bb.2 + + bb.1: + %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32bit + $rdi = COPY %1 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + JMP_1 %bb.1 + + bb.2: + +... diff --git a/llvm/test/Transforms/Coroutines/declare-value.ll b/llvm/test/Transforms/Coroutines/declare-value.ll new file mode 100644 index 0000000000000..94049c28169b9 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/declare-value.ll @@ -0,0 +1,68 @@ +;RUN: opt -mtriple='arm64-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s + +; CHECK: %.debug = alloca double, align 8 +; CHECK-NEXT: #dbg_declare(ptr %{{.*}}, !{{[0-9]+}}, !DIExpression(DW_OP_deref), !{{[0-9]+}}) +; CHECK-NEXT: store double %{{[0-9]+}}, ptr %{{.*}}, align 8 +; CHECK-NEXT: #dbg_declare(ptr %arg, !{{[0-9]+}}, !DIExpression(DW_OP_plus_uconst, 24), !{{[0-9]+}}) + +; ModuleID = '/Users/srastogi/Development/llvm-project-2/llvm/test/Transforms/Coroutines/declare-value.ll' +source_filename = "/Users/srastogi/Development/llvm-project-2/llvm/test/Transforms/Coroutines/declare-value.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-unknown" + +@coroutineATu = global <{ i32, i32 }> <{ i32 trunc (i64 sub (i64 ptrtoint (ptr @coroutineA to i64), i64 ptrtoint (ptr @coroutineATu to i64)) to i32), i32 16 }>, align 8 + +; Function Attrs: presplitcoroutine +define swifttailcc void @coroutineA(ptr swiftasync %arg, double %0) #0 !dbg !1 { + %var_with_dbg_value = alloca ptr, align 8 + %var_with_dbg_declare = alloca ptr, align 8 + #dbg_declare(ptr %var_with_dbg_declare, !5, !DIExpression(), !7) + #dbg_declare_value(double %0, !5, !DIExpression(), !7) + %i2 = call token @llvm.coro.id.async(i32 16, i32 16, i32 0, ptr nonnull @coroutineATu) + %i3 = call ptr @llvm.coro.begin(token %i2, ptr null) + %i7 = call ptr @llvm.coro.async.resume(), !dbg !7 + %i10 = call { ptr } (i32, ptr, ptr, ...) @llvm.coro.suspend.async.sl_p0s(i32 0, ptr %i7, ptr nonnull @__swift_async_resume_get_context, ptr nonnull @coroutineA.1, ptr %i7, i64 0, i64 0, ptr %arg), !dbg !7 + call void @dont_optimize(ptr %var_with_dbg_value, ptr %var_with_dbg_declare), !dbg !7 + unreachable, !dbg !7 +} + +define weak_odr hidden ptr @__swift_async_resume_get_context(ptr %arg) !dbg !8 { + ret ptr %arg, !dbg !9 +} + +define hidden swifttailcc void @coroutineA.1(ptr %arg, i64 %arg1, i64 %arg2, ptr %arg3) !dbg !10 { + ret void, !dbg !11 +} + +declare void @dont_optimize(ptr, ptr) + +; Function Attrs: nomerge nounwind +declare ptr @llvm.coro.async.resume() #1 + +; Function Attrs: nounwind +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 + +; Function Attrs: nounwind +declare token @llvm.coro.id.async(i32, i32, i32, ptr) #2 + +; Function Attrs: nomerge nounwind +declare { ptr } @llvm.coro.suspend.async.sl_p0s(i32, ptr, ptr, ...) #1 + +attributes #0 = { presplitcoroutine } +attributes #1 = { nomerge nounwind } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram(scope: null, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4) +!2 = distinct !DICompileUnit(language: DW_LANG_Swift, file: !3, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) +!3 = !DIFile(filename: "blah", directory: "") +!4 = !{} +!5 = !DILocalVariable(scope: !1, type: !6) +!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Klass") +!7 = !DILocation(line: 0, scope: !1) +!8 = distinct !DISubprogram(scope: null, spFlags: DISPFlagDefinition, unit: !2) +!9 = !DILocation(line: 0, scope: !8) +!10 = distinct !DISubprogram(scope: null, spFlags: DISPFlagDefinition, unit: !2) +!11 = !DILocation(line: 0, scope: !10) \ No newline at end of file diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll index 212a5c99676f4..877484f5159fd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll @@ -63,7 +63,7 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) { ; CHECK-NEXT: store i32 [[STORE]], ptr [[NBRBOXES]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i32 [[IV]], [[IBOX]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -114,7 +114,7 @@ define void @predicated_strided_store(ptr %start) { ; RVA23-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]] ; RVA23-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; RVA23-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; RVA23-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; RVA23-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; RVA23: middle.block: ; RVA23-NEXT: br label [[LOOP:%.*]] ; RVA23: exit: @@ -141,7 +141,7 @@ define void @predicated_strided_store(ptr %start) { ; RVA23ZVL1024B-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]] ; RVA23ZVL1024B-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; RVA23ZVL1024B-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; RVA23ZVL1024B-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; RVA23ZVL1024B-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; RVA23ZVL1024B: middle.block: ; RVA23ZVL1024B-NEXT: br label [[LOOP:%.*]] ; RVA23ZVL1024B: exit: @@ -185,16 +185,16 @@ define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p0, ptr no ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P1:%.*]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2p0.nxv2p0( [[BROADCAST_SPLAT1]], align 8 [[TMP5]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[P2:%.*]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P3:%.*]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P3:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, ptr [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0( zeroinitializer, align 4 [[TMP7]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0( zeroinitializer, align 4 [[TMP7]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0( zeroinitializer, align 1 [[TMP7]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll new file mode 100644 index 0000000000000..6746e92cc1fd1 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 6 +; RUN: opt -p loop-vectorize -force-vector-width=2 \ +; RUN: -force-target-supports-scalable-vectors=true \ +; RUN: -scalable-vectorization=preferred -S %s | FileCheck %s + +define void @widengep_narrow(ptr %in, ptr noalias %p) { +; CHECK-LABEL: define void @widengep_narrow( +; CHECK-SAME: ptr [[IN:%.*]], ptr noalias [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[IN]], i64 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[TMP4]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, [[BROADCAST_SPLAT2]], [[VEC_IND]] +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement [[TMP7]], i32 [[TMP10]] +; CHECK-NEXT: store ptr [[TMP11]], ptr [[P]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.in.off = getelementptr i8, ptr %in, i64 8 + %gep.in.iv = getelementptr i32, ptr %gep.in.off, i64 %iv + store ptr %gep.in.iv, ptr %p + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 1024 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll b/llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll new file mode 100644 index 0000000000000..00eeb69dcb0f7 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll @@ -0,0 +1,201 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-interleaved-mem-accesses -S | FileCheck %s + +define void @pr128062(ptr %dst.start, i8 %a, i16 %b) { +; CHECK-LABEL: define void @pr128062( +; CHECK-SAME: ptr [[DST_START:%.*]], i8 [[A:%.*]], i16 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = zext <4 x i8> [[STRIDED_VEC]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <4 x i16> [[TMP0]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], splat (i16 255) +; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i16> [[TMP2]] to <4 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[STRIDED_VEC3]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw <4 x i16> [[TMP5]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i16> [[TMP6]], splat (i16 255) +; CHECK-NEXT: [[TMP8:%.*]] = trunc nuw <4 x i16> [[TMP7]] to <4 x i8> +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[STRIDED_VEC4]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = mul nuw <4 x i16> [[TMP10]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = udiv <4 x i16> [[TMP11]], splat (i16 255) +; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i16> [[TMP12]] to <4 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[STRIDED_VEC5]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i16> [[TMP15]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP17:%.*]] = udiv <4 x i16> [[TMP16]], splat (i16 255) +; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <4 x i16> [[TMP17]] to <4 x i8> +; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> [[TMP9]], <8 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP19]], <8 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> [[TMP21]], <16 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 +; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %dst = phi ptr [ %dst.start, %entry ], [ %dst.next, %loop ] + %dst.next = getelementptr inbounds nuw i8, ptr %dst, i64 4 + %load.dst = load i8, ptr %dst, align 1 + %dst.ext = zext i8 %load.dst to i16 + %mul.dst.0 = mul nuw i16 %dst.ext, %b + %udiv.0 = udiv i16 %mul.dst.0, 255 + %trunc.0 = trunc nuw i16 %udiv.0 to i8 + %val.0 = add i8 %a, %trunc.0 + store i8 %val.0, ptr %dst, align 1 + %gep.dst.1 = getelementptr inbounds nuw i8, ptr %dst, i64 1 + %load.dst.1 = load i8, ptr %gep.dst.1, align 1 + %dst.1.ext = zext i8 %load.dst.1 to i16 + %mul.dst.1 = mul nuw i16 %dst.1.ext, %b + %udiv.1 = udiv i16 %mul.dst.1, 255 + %trunc.1 = trunc nuw i16 %udiv.1 to i8 + %val.1 = add i8 %a, %trunc.1 + store i8 %val.1, ptr %gep.dst.1, align 1 + %gep.dst.2 = getelementptr inbounds nuw i8, ptr %dst, i64 2 + %load.dst.2 = load i8, ptr %gep.dst.2, align 1 + %dst.2.ext = zext i8 %load.dst.2 to i16 + %mul.dst.2 = mul nuw i16 %dst.2.ext, %b + %udiv.2 = udiv i16 %mul.dst.2, 255 + %trunc.2 = trunc nuw i16 %udiv.2 to i8 + %val.2 = add i8 %a, %trunc.2 + store i8 %val.2, ptr %gep.dst.2, align 1 + %gep.dst.3 = getelementptr inbounds nuw i8, ptr %dst, i64 3 + %load.dst.3 = load i8, ptr %gep.dst.3, align 1 + %dst.3.ext = zext i8 %load.dst.3 to i16 + %mul.dst.3 = mul nuw i16 %dst.3.ext, %b + %udiv.3 = udiv i16 %mul.dst.3, 255 + %trunc.3 = trunc nuw i16 %udiv.3 to i8 + %val.3 = add i8 %a, %trunc.3 + store i8 %val.3, ptr %gep.dst.3, align 1 + %iv.next = add i64 %iv, 4 + %exit.cond = icmp eq i64 %iv.next, 256 + br i1 %exit.cond, label %exit, label %loop + +exit: + ret void +} + +; Same as above, except one zext is replaced with an sext. +define void @opcode_mismatch(ptr %dst.start, i8 %a, i16 %b) { +; CHECK-LABEL: define void @opcode_mismatch( +; CHECK-SAME: ptr [[DST_START:%.*]], i8 [[A:%.*]], i16 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = zext <4 x i8> [[STRIDED_VEC]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <4 x i16> [[TMP0]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], splat (i16 255) +; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i16> [[TMP2]] to <4 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i8> [[STRIDED_VEC3]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw <4 x i16> [[TMP5]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i16> [[TMP6]], splat (i16 255) +; CHECK-NEXT: [[TMP8:%.*]] = trunc nuw <4 x i16> [[TMP7]] to <4 x i8> +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[STRIDED_VEC4]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = mul nuw <4 x i16> [[TMP10]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = udiv <4 x i16> [[TMP11]], splat (i16 255) +; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i16> [[TMP12]] to <4 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[STRIDED_VEC5]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i16> [[TMP15]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP17:%.*]] = udiv <4 x i16> [[TMP16]], splat (i16 255) +; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <4 x i16> [[TMP17]] to <4 x i8> +; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> [[TMP9]], <8 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP19]], <8 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> [[TMP21]], <16 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 +; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %dst = phi ptr [ %dst.start, %entry ], [ %dst.next, %loop ] + %dst.next = getelementptr inbounds nuw i8, ptr %dst, i64 4 + %load.dst = load i8, ptr %dst, align 1 + %dst.ext = zext i8 %load.dst to i16 + %mul.dst.0 = mul nuw i16 %dst.ext, %b + %udiv.0 = udiv i16 %mul.dst.0, 255 + %trunc.0 = trunc nuw i16 %udiv.0 to i8 + %val.0 = add i8 %a, %trunc.0 + store i8 %val.0, ptr %dst, align 1 + %gep.dst.1 = getelementptr inbounds nuw i8, ptr %dst, i64 1 + %load.dst.1 = load i8, ptr %gep.dst.1, align 1 + %dst.1.ext = sext i8 %load.dst.1 to i16 + %mul.dst.1 = mul nuw i16 %dst.1.ext, %b + %udiv.1 = udiv i16 %mul.dst.1, 255 + %trunc.1 = trunc nuw i16 %udiv.1 to i8 + %val.1 = add i8 %a, %trunc.1 + store i8 %val.1, ptr %gep.dst.1, align 1 + %gep.dst.2 = getelementptr inbounds nuw i8, ptr %dst, i64 2 + %load.dst.2 = load i8, ptr %gep.dst.2, align 1 + %dst.2.ext = zext i8 %load.dst.2 to i16 + %mul.dst.2 = mul nuw i16 %dst.2.ext, %b + %udiv.2 = udiv i16 %mul.dst.2, 255 + %trunc.2 = trunc nuw i16 %udiv.2 to i8 + %val.2 = add i8 %a, %trunc.2 + store i8 %val.2, ptr %gep.dst.2, align 1 + %gep.dst.3 = getelementptr inbounds nuw i8, ptr %dst, i64 3 + %load.dst.3 = load i8, ptr %gep.dst.3, align 1 + %dst.3.ext = zext i8 %load.dst.3 to i16 + %mul.dst.3 = mul nuw i16 %dst.3.ext, %b + %udiv.3 = udiv i16 %mul.dst.3, 255 + %trunc.3 = trunc nuw i16 %udiv.3 to i8 + %val.3 = add i8 %a, %trunc.3 + store i8 %val.3, ptr %gep.dst.3, align 1 + %iv.next = add i64 %iv, 4 + %exit.cond = icmp eq i64 %iv.next, 256 + br i1 %exit.cond, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll index 9bb010c0431d8..90ef97609e096 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll @@ -8,14 +8,14 @@ define void @pr63340(ptr %A, ptr %B) { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 1 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[B]], i8 [[OFFSET_IDX]] -; CHECK-NEXT: store <4 x ptr> [[DOTSPLAT]], ptr [[TMP1]], align 8 +; CHECK-NEXT: store <4 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -55,11 +55,11 @@ define void @wide_gep_index_invariant(ptr noalias %dst, ptr noalias %src, i64 %n ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC]], align 8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[N]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, <4 x ptr> [[BROADCAST_SPLAT]], i64 [[N]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP2]], align 8 +; CHECK-NEXT: store <4 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/tools/dsymutil/AArch64/dummy-debug-map-arm64.map b/llvm/test/tools/dsymutil/AArch64/dummy-debug-map-arm64.map index 50d860290422c..bd2b2014ee22c 100644 --- a/llvm/test/tools/dsymutil/AArch64/dummy-debug-map-arm64.map +++ b/llvm/test/tools/dsymutil/AArch64/dummy-debug-map-arm64.map @@ -11,9 +11,13 @@ objects: - filename: 1.o symbols: - { sym: _bar, objAddr: 0x0, binAddr: 0x10000, size: 0x10 } + - { sym: __Z13lib1_internalv, objAddr: 0x0, binAddr: 0x10020, size: 0x20 } + - { sym: __ZN3Foo4funcIZ13lib1_internalvE3$_0EEvv, objAddr: 0x0, binAddr: 0x10040, size: 0x20 } - filename: 2.o symbols: - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x20000, size: 0x10 } + - { sym: __Z13lib1_internalv, objAddr: 0x0, binAddr: 0x20020, size: 0x20 } + - { sym: __ZN3Foo4funcIZ13lib1_internalvE3$_0EEvv, objAddr: 0x0, binAddr: 0x20040, size: 0x20 } - filename: 3.o symbols: - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x30000, size: 0x10 } diff --git a/llvm/test/tools/dsymutil/AArch64/dwarf5-str-offsets-base-strx.test b/llvm/test/tools/dsymutil/AArch64/dwarf5-str-offsets-base-strx.test index c0c4fe835682f..c5110a873c603 100644 --- a/llvm/test/tools/dsymutil/AArch64/dwarf5-str-offsets-base-strx.test +++ b/llvm/test/tools/dsymutil/AArch64/dwarf5-str-offsets-base-strx.test @@ -98,7 +98,7 @@ CHECK: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x000000 CHECK: DW_AT_comp_dir [DW_FORM_strx] (indexed (00000004) string = "/Users/shubham/Development/test109275485") CHECK: DW_TAG_subprogram -CHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0x0000000000010000) +CHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0x0000000000010040) CHECK: DW_AT_linkage_name [DW_FORM_strx] (indexed (00000005) string = "_Z4foo2i") CHECK: DW_AT_name [DW_FORM_strx] (indexed (00000006) string = "foo2") diff --git a/llvm/test/tools/dsymutil/AArch64/inlined-low_pc.c b/llvm/test/tools/dsymutil/AArch64/inlined-low_pc.c index d2d36f675e8b7..b89a6f99ebcb3 100644 --- a/llvm/test/tools/dsymutil/AArch64/inlined-low_pc.c +++ b/llvm/test/tools/dsymutil/AArch64/inlined-low_pc.c @@ -10,10 +10,10 @@ int bar(int a) { return foo(a); } // RUN: llvm-dwarfdump - | FileCheck %s // CHECK: DW_TAG_subprogram -// CHECK: DW_AT_low_pc{{.*}}0x0000000000010000 +// CHECK: DW_AT_low_pc{{.*}}0x0000000000010040 // CHECK: DW_AT_name{{.*}}"bar" // CHECK-NOT: NULL // CHECK: DW_TAG_inlined_subroutine // CHECK-NEXT: DW_AT_abstract_origin{{.*}}"foo" -// CHECK-NEXT: DW_AT_low_pc{{.*}}0x0000000000010000 +// CHECK-NEXT: DW_AT_low_pc{{.*}}0x0000000000010040 diff --git a/llvm/test/tools/dsymutil/AArch64/odr-uniquing-DW_AT_name-conflict.test b/llvm/test/tools/dsymutil/AArch64/odr-uniquing-DW_AT_name-conflict.test new file mode 100644 index 0000000000000..b6edb8bca3194 --- /dev/null +++ b/llvm/test/tools/dsymutil/AArch64/odr-uniquing-DW_AT_name-conflict.test @@ -0,0 +1,28 @@ +# Tests the case where a DW_TAG_subprogram for a method declaration +# got uniqued into a DW_TAG_subprogram with the same linkage name (but +# different DW_AT_name). Make sure the DW_TAG_subprogram DIE for the +# definition, which previously pointed to the now de-deduplicated declaration, +# gets inserted into the .debug_names table using the DW_AT_name of the canonical +# declaration DW_TAG_subprogram. +# +# Object files compiled as follows: +# clang -g -c -o 1.o Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp +# clang -g -c -o 2.o Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp + +# RUN: dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing-DW_AT_name-conflict -y %p/dummy-debug-map-arm64.map -o - \ +# RUN: | llvm-dwarfdump --verify - | FileCheck %s + +# RUN: dsymutil --linker parallel -f -oso-prepend-path=%p/../Inputs/odr-uniquing-DW_AT_name-conflict -y %p/dummy-debug-map-arm64.map -o - \ +# RUN: | not llvm-dwarfdump --verify - | FileCheck %s --check-prefix=PARALLEL-ODR + +# RUN: dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing-DW_AT_name-conflict -y %p/dummy-debug-map-arm64.map -no-odr -o - \ +# RUN: | llvm-dwarfdump --verify - | FileCheck %s + +# RUN: dsymutil --linker parallel -f -oso-prepend-path=%p/../Inputs/odr-uniquing-DW_AT_name-conflict -y %p/dummy-debug-map-arm64.map -no-odr -o - \ +# RUN: | llvm-dwarfdump --verify - | FileCheck %s + +# CHECK: No errors. + +# FIXME: parallel DWARFLinker uses wrong DW_AT_name when inserting uniqued subprogram into .debug_names +# PARALLEL-ODR: Verifying .debug_names... +# PARALLEL-ODR-NEXT: error: Name Index {{.*}} mismatched Name of DIE diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/1.o b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/1.o new file mode 100644 index 0000000000000..5932a3c89aaeb Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/1.o differ diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/2.o b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/2.o new file mode 100644 index 0000000000000..607b47059e982 Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/2.o differ diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp new file mode 100644 index 0000000000000..4cf90f071ee8c --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp @@ -0,0 +1,5 @@ +#include "lib1.h" + +[[gnu::weak]] void lib1_internal() { + Foo{}.func(); +} diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.h b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.h new file mode 100644 index 0000000000000..3b3cefbaeac17 --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.h @@ -0,0 +1,3 @@ +struct Foo { + template void func() {} +}; diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp new file mode 100644 index 0000000000000..4cf90f071ee8c --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp @@ -0,0 +1,5 @@ +#include "lib1.h" + +[[gnu::weak]] void lib1_internal() { + Foo{}.func(); +} diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/main.cpp b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/main.cpp new file mode 100644 index 0000000000000..77f2cc4c8affe --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/main.cpp @@ -0,0 +1,6 @@ +[[gnu::weak]] void lib1_internal(); + +int main() { + lib1_internal(); + __builtin_debugtrap(); +} diff --git a/llvm/tools/bugpoint/BugDriver.h b/llvm/tools/bugpoint/BugDriver.h index ca57405f9d770..71a5aa14bbb2e 100644 --- a/llvm/tools/bugpoint/BugDriver.h +++ b/llvm/tools/bugpoint/BugDriver.h @@ -16,6 +16,7 @@ #define LLVM_TOOLS_BUGPOINT_BUGDRIVER_H #include "llvm/IR/ValueMap.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -41,6 +42,10 @@ extern bool DisableSimplifyCFG; /// extern bool BugpointIsInterrupted; +/// Command line options used across files. +extern cl::list InputArgv; +extern cl::opt OutputPrefix; + class BugDriver { LLVMContext &Context; const char *ToolName; // argv[0] of bugpoint diff --git a/llvm/tools/bugpoint/ExecutionDriver.cpp b/llvm/tools/bugpoint/ExecutionDriver.cpp index 8c6b7fbe50c7c..96eeb35b4db70 100644 --- a/llvm/tools/bugpoint/ExecutionDriver.cpp +++ b/llvm/tools/bugpoint/ExecutionDriver.cpp @@ -13,7 +13,6 @@ #include "BugDriver.h" #include "ToolRunner.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Program.h" @@ -102,15 +101,13 @@ static cl::opt CustomExecCommand( // Anything specified after the --args option are taken as arguments to the // program being debugged. -namespace llvm { -cl::list InputArgv("args", cl::Positional, - cl::desc("..."), - cl::PositionalEatsArgs); - -cl::opt - OutputPrefix("output-prefix", cl::init("bugpoint"), - cl::desc("Prefix to use for outputs (default: 'bugpoint')")); -} // namespace llvm +cl::list llvm::InputArgv("args", cl::Positional, + cl::desc("..."), + cl::PositionalEatsArgs); + +cl::opt llvm::OutputPrefix( + "output-prefix", cl::init("bugpoint"), + cl::desc("Prefix to use for outputs (default: 'bugpoint')")); static cl::list ToolArgv("tool-args", cl::Positional, cl::desc("..."), diff --git a/llvm/tools/bugpoint/ExtractFunction.cpp b/llvm/tools/bugpoint/ExtractFunction.cpp index 3206589ff38f2..31cdd0d43f2fc 100644 --- a/llvm/tools/bugpoint/ExtractFunction.cpp +++ b/llvm/tools/bugpoint/ExtractFunction.cpp @@ -36,9 +36,6 @@ using namespace llvm; #define DEBUG_TYPE "bugpoint" bool llvm::DisableSimplifyCFG = false; -namespace llvm { -extern cl::opt OutputPrefix; -} // namespace llvm static cl::opt NoDCE("disable-dce", diff --git a/llvm/tools/bugpoint/Miscompilation.cpp b/llvm/tools/bugpoint/Miscompilation.cpp index a7f1643aecf15..dcad126d87865 100644 --- a/llvm/tools/bugpoint/Miscompilation.cpp +++ b/llvm/tools/bugpoint/Miscompilation.cpp @@ -28,11 +28,6 @@ using namespace llvm; -namespace llvm { -extern cl::opt OutputPrefix; -extern cl::list InputArgv; -} // end namespace llvm - static cl::opt DisableLoopExtraction( "disable-loop-extraction", cl::desc("Don't extract loops when searching for miscompilations"), diff --git a/llvm/tools/bugpoint/OptimizerDriver.cpp b/llvm/tools/bugpoint/OptimizerDriver.cpp index bf2e8c0b4a910..191f87c08a0f6 100644 --- a/llvm/tools/bugpoint/OptimizerDriver.cpp +++ b/llvm/tools/bugpoint/OptimizerDriver.cpp @@ -34,10 +34,6 @@ using namespace llvm; #define DEBUG_TYPE "bugpoint" -namespace llvm { -extern cl::opt OutputPrefix; -} - static cl::opt OptCmd("opt-command", cl::init(""), cl::desc("Path to opt. (default: search path " diff --git a/llvm/unittests/DebugInfo/PDB/NativeSessionTest.cpp b/llvm/unittests/DebugInfo/PDB/NativeSessionTest.cpp index cffaf7c9543fb..20ae253513f05 100644 --- a/llvm/unittests/DebugInfo/PDB/NativeSessionTest.cpp +++ b/llvm/unittests/DebugInfo/PDB/NativeSessionTest.cpp @@ -40,6 +40,18 @@ TEST(NativeSessionTest, TestCreateFromExe) { ASSERT_THAT_ERROR(std::move(E), Succeeded()); } +TEST(NativeSessionTest, TestInvalidPdbMagicError) { + SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0); + llvm::sys::path::append(InputsDir, "SimpleTest.cpp"); + std::string CppPath{InputsDir}; + std::unique_ptr S; + + Error E = NativeSession::createFromPdbPath(CppPath, S); + const char *FormatErr = "The record is in an unexpected format. " + "The input file did not contain the pdb file magic."; + ASSERT_THAT_ERROR(std::move(E), FailedWithMessage(FormatErr)); +} + TEST(NativeSessionTest, TestSetLoadAddress) { std::unique_ptr S; Error E = pdb::loadDataForEXE(PDB_ReaderType::Native, getExePath(), S); diff --git a/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn index 66dbf6152472a..9b524e2ef7cd5 100644 --- a/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn @@ -29,6 +29,8 @@ static_library("Driver") { sources = [ "Action.cpp", "Compilation.cpp", + "CreateASTUnitFromArgs.cpp", + "CreateInvocationFromArgs.cpp", "Distro.cpp", "Driver.cpp", "Job.cpp", diff --git a/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn index 4009cfc609f4a..cdf39d645bc52 100644 --- a/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn @@ -28,7 +28,6 @@ static_library("Frontend") { "ChainedIncludesSource.cpp", "CompilerInstance.cpp", "CompilerInvocation.cpp", - "CreateInvocationFromCommandLine.cpp", "DependencyFile.cpp", "DependencyGraph.cpp", "DiagnosticRenderer.cpp", @@ -48,6 +47,7 @@ static_library("Frontend") { "SARIFDiagnosticPrinter.cpp", "SerializedDiagnosticPrinter.cpp", "SerializedDiagnosticReader.cpp", + "StandaloneDiagnostic.cpp", "TestModuleFileExtension.cpp", "TextDiagnostic.cpp", "TextDiagnosticBuffer.cpp", diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 09d2f1ed92554..82fe916645635 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -1167,7 +1167,6 @@ if (current_toolchain == default_toolchain) { "__locale_dir/locale_base_api.h", "__locale_dir/locale_base_api/bsd_locale_fallbacks.h", "__locale_dir/locale_base_api/ibm.h", - "__locale_dir/locale_base_api/musl.h", "__locale_dir/locale_base_api/openbsd.h", "__locale_dir/messages.h", "__locale_dir/money.h", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn index ad72c0069237d..e54797e188a11 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn @@ -165,6 +165,7 @@ static_library("LLVMRISCVCodeGen") { "RISCVVectorMaskDAGMutation.cpp", "RISCVVectorPeephole.cpp", "RISCVZacasABIFix.cpp", + "RISCVZilsdOptimizer.cpp", ] } diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h index c1ade9ed8617c..cc7f09f71d028 100644 --- a/mlir/include/mlir-c/Dialect/LLVM.h +++ b/mlir/include/mlir-c/Dialect/LLVM.h @@ -23,6 +23,8 @@ MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(LLVM, llvm); MLIR_CAPI_EXPORTED MlirType mlirLLVMPointerTypeGet(MlirContext ctx, unsigned addressSpace); +MLIR_CAPI_EXPORTED MlirTypeID mlirLLVMPointerTypeGetTypeID(void); + /// Returns `true` if the type is an LLVM dialect pointer type. MLIR_CAPI_EXPORTED bool mlirTypeIsALLVMPointerType(MlirType type); @@ -58,6 +60,8 @@ MLIR_CAPI_EXPORTED MlirType mlirLLVMFunctionTypeGetReturnType(MlirType type); /// Returns `true` if the type is an LLVM dialect struct type. MLIR_CAPI_EXPORTED bool mlirTypeIsALLVMStructType(MlirType type); +MLIR_CAPI_EXPORTED MlirTypeID mlirLLVMStructTypeGetTypeID(void); + /// Returns `true` if the type is a literal (unnamed) LLVM struct type. MLIR_CAPI_EXPORTED bool mlirLLVMStructTypeIsLiteral(MlirType type); diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h index 05d2316711c8a..601fc1a594768 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h @@ -177,6 +177,10 @@ static constexpr StringLiteral getRoutineInfoAttrName() { return StringLiteral("acc.routine_info"); } +static constexpr StringLiteral getFromDefaultClauseAttrName() { + return StringLiteral("acc.from_default"); +} + static constexpr StringLiteral getVarNameAttrName() { return VarNameAttr::name; } diff --git a/mlir/lib/Bindings/Python/DialectLLVM.cpp b/mlir/lib/Bindings/Python/DialectLLVM.cpp index 870a713b8edcb..05681cecf82b3 100644 --- a/mlir/lib/Bindings/Python/DialectLLVM.cpp +++ b/mlir/lib/Bindings/Python/DialectLLVM.cpp @@ -31,8 +31,8 @@ static void populateDialectLLVMSubmodule(nanobind::module_ &m) { // StructType //===--------------------------------------------------------------------===// - auto llvmStructType = - mlir_type_subclass(m, "StructType", mlirTypeIsALLVMStructType); + auto llvmStructType = mlir_type_subclass( + m, "StructType", mlirTypeIsALLVMStructType, mlirLLVMStructTypeGetTypeID); llvmStructType .def_classmethod( @@ -137,7 +137,8 @@ static void populateDialectLLVMSubmodule(nanobind::module_ &m) { // PointerType //===--------------------------------------------------------------------===// - mlir_type_subclass(m, "PointerType", mlirTypeIsALLVMPointerType) + mlir_type_subclass(m, "PointerType", mlirTypeIsALLVMPointerType, + mlirLLVMPointerTypeGetTypeID) .def_classmethod( "get", [](const nb::object &cls, std::optional addressSpace, diff --git a/mlir/lib/CAPI/Dialect/LLVM.cpp b/mlir/lib/CAPI/Dialect/LLVM.cpp index 6636f0ea73ec9..bf231767320a5 100644 --- a/mlir/lib/CAPI/Dialect/LLVM.cpp +++ b/mlir/lib/CAPI/Dialect/LLVM.cpp @@ -27,6 +27,10 @@ MlirType mlirLLVMPointerTypeGet(MlirContext ctx, unsigned addressSpace) { return wrap(LLVMPointerType::get(unwrap(ctx), addressSpace)); } +MlirTypeID mlirLLVMPointerTypeGetTypeID() { + return wrap(LLVM::LLVMPointerType::getTypeID()); +} + bool mlirTypeIsALLVMPointerType(MlirType type) { return isa(unwrap(type)); } @@ -73,6 +77,10 @@ bool mlirTypeIsALLVMStructType(MlirType type) { return isa(unwrap(type)); } +MlirTypeID mlirLLVMStructTypeGetTypeID() { + return wrap(LLVM::LLVMStructType::getTypeID()); +} + bool mlirLLVMStructTypeIsLiteral(MlirType type) { return !cast(unwrap(type)).isIdentified(); } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index ce93d18f56d39..5dc4fa2b2d82f 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -667,6 +667,7 @@ LogicalResult LLVMStructType::verifyEntries(DataLayoutEntryListRef entries, static constexpr llvm::StringRef kSpirvPrefix = "spirv."; static constexpr llvm::StringRef kArmSVCount = "aarch64.svcount"; +static constexpr llvm::StringRef kAMDGCNNamedBarrier = "amdgcn.named.barrier"; bool LLVM::LLVMTargetExtType::hasProperty(Property prop) const { // See llvm/lib/IR/Type.cpp for reference. @@ -676,6 +677,9 @@ bool LLVM::LLVMTargetExtType::hasProperty(Property prop) const { properties |= (LLVMTargetExtType::HasZeroInit | LLVM::LLVMTargetExtType::CanBeGlobal); + if (getExtTypeName() == kAMDGCNNamedBarrier) + properties |= LLVMTargetExtType::CanBeGlobal; + return (properties & prop) == prop; } diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp index 91262bd76ca31..7d729619b3f21 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp @@ -570,6 +570,8 @@ Operation *ACCImplicitData::generateDataClauseOpForCandidate( newDataOp = acc::PresentOp::create(builder, loc, var, /*structured=*/true, /*implicit=*/true, accSupport.getVariableName(var)); + newDataOp->setAttr(acc::getFromDefaultClauseAttrName(), + builder.getUnitAttr()); } else { auto copyinOp = acc::CopyinOp::create(builder, loc, var, diff --git a/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir b/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir index cf09c33ca5197..06c1c3cadd4ba 100644 --- a/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir +++ b/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir @@ -110,7 +110,7 @@ func.func @test_array_parallel_defaultpresent() { } // CHECK-LABEL: func.func @test_array_parallel_defaultpresent -// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : memref<10xf32>) -> memref<10xf32> {implicit = true, name = ""} +// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : memref<10xf32>) -> memref<10xf32> {acc.from_default, implicit = true, name = ""} // CHECK: acc.delete accPtr(%[[PRESENT]] : memref<10xf32>) {dataClause = #acc, implicit = true, name = ""} // ----- diff --git a/mlir/test/Target/LLVMIR/target-ext-type.mlir b/mlir/test/Target/LLVMIR/target-ext-type.mlir index 6b2d2ea3d4c23..cee630163ca21 100644 --- a/mlir/test/Target/LLVMIR/target-ext-type.mlir +++ b/mlir/test/Target/LLVMIR/target-ext-type.mlir @@ -6,6 +6,12 @@ llvm.mlir.global external @global() {addr_space = 0 : i32} : !llvm.target<"spirv llvm.return %0 : !llvm.target<"spirv.DeviceEvent"> } +// CHECK: @amdgcn_named_barrier = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison +llvm.mlir.global internal @amdgcn_named_barrier() {addr_space = 3 : i32} : !llvm.target<"amdgcn.named.barrier", 0> { + %0 = llvm.mlir.poison : !llvm.target<"amdgcn.named.barrier", 0> + llvm.return %0 : !llvm.target<"amdgcn.named.barrier", 0> +} + // CHECK-LABEL: define target("spirv.Event") @func2() { // CHECK-NEXT: %1 = alloca target("spirv.Event"), align 8 // CHECK-NEXT: %2 = load target("spirv.Event"), ptr %1, align 8 diff --git a/mlir/test/python/dialects/llvm.py b/mlir/test/python/dialects/llvm.py index 8ea0fddee3f7c..305ed9aba940d 100644 --- a/mlir/test/python/dialects/llvm.py +++ b/mlir/test/python/dialects/llvm.py @@ -98,6 +98,9 @@ def testStructType(): assert opaque.opaque # CHECK: !llvm.struct<"opaque", opaque> + typ = Type.parse('!llvm.struct<"zoo", (i32, i64)>') + assert isinstance(typ, llvm.StructType) + # CHECK-LABEL: testSmoke @constructAndPrintInModule @@ -120,6 +123,9 @@ def testPointerType(): # CHECK: !llvm.ptr<1> print(ptr_with_addr) + typ = Type.parse("!llvm.ptr<1>") + assert isinstance(typ, llvm.PointerType) + # CHECK-LABEL: testConstant @constructAndPrintInModule