diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst b/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst index cc0c729aaacdc..b359d85ad0cdc 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst @@ -4,7 +4,7 @@ cert-mem57-cpp ============== The `cert-mem57-cpp` is an aliaes, please see -`bugprone-default-operator-new-on-overaligned-type <../bugprone/default-operator-new-on-overaligned-type>`_ +`bugprone-default-operator-new-on-overaligned-type <../bugprone/default-operator-new-on-overaligned-type.html>`_ for more information. This check corresponds to the CERT C++ Coding Standard rule diff --git a/clang-tools-extra/docs/clang-tidy/checks/llvm/twine-local.rst b/clang-tools-extra/docs/clang-tidy/checks/llvm/twine-local.rst index ec9ef1c60913c..6c994a48d83de 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/llvm/twine-local.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/llvm/twine-local.rst @@ -14,3 +14,21 @@ should be generally avoided. // becomes static std::string Moo = (Twine("bark") + "bah").str(); + +The ``Twine`` does not own the memory of its contents, so it is not +recommended to use ``Twine`` created from temporary strings or string literals. + +.. code-block:: c++ + + static Twine getModuleIdentifier(StringRef moduleName) { + return moduleName + "_module"; + } + void foo() { + Twine result = getModuleIdentifier(std::string{"abc"} + "def"); + // temporary std::string is destroyed here, result is dangling + } + +After applying this fix-it hints, the code will use ``std::string`` instead of +``Twine`` for local variables. However, ``Twine`` has lots of methods that +are incompatible with ``std::string``, so the user may need to adjust the code +manually after applying the fix-it hints. diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 48ef8be9fb782..6f099a7027a10 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -180,6 +180,8 @@ struct MissingFeatures { static bool atomicSyncScopeID() { return false; } static bool atomicTypes() { return false; } static bool atomicUseLibCall() { return false; } + static bool atomicMicrosoftVolatile() { return false; } + static bool atomicOpenMP() { return false; } // Global ctor handling static bool globalCtorLexOrder() { return false; } diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp index e0b2852f0e906..2425373ab2ef8 100644 --- a/clang/lib/AST/ByteCode/Program.cpp +++ b/clang/lib/AST/ByteCode/Program.cpp @@ -218,21 +218,42 @@ UnsignedOrNone Program::createGlobal(const ValueDecl *VD, const Expr *Init) { return std::nullopt; Global *NewGlobal = Globals[*Idx]; + // Note that this loop has one iteration where Redecl == VD. for (const Decl *Redecl : VD->redecls()) { - unsigned &PIdx = GlobalIndices[Redecl]; + + // If this redecl was registered as a dummy variable, it is now a proper + // global variable and points to the block we just created. + if (auto DummyIt = DummyVariables.find(Redecl); + DummyIt != DummyVariables.end()) { + assert(!Globals[DummyIt->second]->block()->hasPointers()); + Globals[DummyIt->second] = NewGlobal; + DummyVariables.erase(DummyIt); + } + // If the redeclaration hasn't been registered yet at all, we just set its + // global index to Idx. If it has been registered yet, it might have + // pointers pointing to it and we need to transfer those pointers to the new + // block. + auto [Iter, Inserted] = GlobalIndices.try_emplace(Redecl); + if (Inserted) { + GlobalIndices[Redecl] = *Idx; + continue; + } + if (Redecl != VD) { - if (Block *RedeclBlock = Globals[PIdx]->block(); + if (Block *RedeclBlock = Globals[Iter->second]->block(); RedeclBlock->isExtern()) { - Globals[PIdx] = NewGlobal; + // All pointers pointing to the previous extern decl now point to the // new decl. // A previous iteration might've already fixed up the pointers for this // global. if (RedeclBlock != NewGlobal->block()) RedeclBlock->movePointersTo(NewGlobal->block()); + + Globals[Iter->second] = NewGlobal; } } - PIdx = *Idx; + Iter->second = *Idx; } return *Idx; diff --git a/clang/lib/AST/ByteCode/Program.h b/clang/lib/AST/ByteCode/Program.h index 28fcc97f5339d..cc9127dc77860 100644 --- a/clang/lib/AST/ByteCode/Program.h +++ b/clang/lib/AST/ByteCode/Program.h @@ -205,7 +205,6 @@ class Program final { const Block *block() const { return &B; } private: - /// Required metadata - does not actually track pointers. Block B; }; diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp index 7db6e283ec0a5..cd4c1f0e5b769 100644 --- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp @@ -27,6 +27,7 @@ class AtomicInfo { CharUnits atomicAlign; CharUnits valueAlign; TypeEvaluationKind evaluationKind = cir::TEK_Scalar; + bool useLibCall = true; LValue lvalue; mlir::Location loc; @@ -62,8 +63,8 @@ class AtomicInfo { assert(!cir::MissingFeatures::atomicInfo()); cgf.cgm.errorNYI(loc, "AtomicInfo: non-simple lvalue"); } - - assert(!cir::MissingFeatures::atomicUseLibCall()); + useLibCall = !ctx.getTargetInfo().hasBuiltinAtomic( + atomicSizeInBits, ctx.toBits(lvalue.getAlignment())); } QualType getValueType() const { return valueTy; } @@ -75,6 +76,8 @@ class AtomicInfo { assert(!cir::MissingFeatures::atomicInfoGetAtomicPointer()); return nullptr; } + bool shouldUseLibCall() const { return useLibCall; } + const LValue &getAtomicLValue() const { return lvalue; } Address getAtomicAddress() const { mlir::Type elemTy; if (lvalue.isSimple()) { @@ -96,6 +99,8 @@ class AtomicInfo { bool emitMemSetZeroIfNecessary() const; + mlir::Value getScalarRValValueOrNull(RValue rvalue) const; + /// Cast the given pointer to an integer pointer suitable for atomic /// operations on the source. Address castToAtomicIntPointer(Address addr) const; @@ -105,6 +110,9 @@ class AtomicInfo { /// copy the value across. Address convertToAtomicIntPointer(Address addr) const; + /// Converts a rvalue to integer value. + mlir::Value convertRValueToInt(RValue rvalue, bool cmpxchg = false) const; + /// Copy an atomic r-value into atomic-layout memory. void emitCopyIntoMemory(RValue rvalue) const; @@ -195,6 +203,12 @@ Address AtomicInfo::createTempAlloca() const { return tempAlloca; } +mlir::Value AtomicInfo::getScalarRValValueOrNull(RValue rvalue) const { + if (rvalue.isScalar() && (!hasPadding() || !lvalue.isSimple())) + return rvalue.getValue(); + return nullptr; +} + Address AtomicInfo::castToAtomicIntPointer(Address addr) const { auto intTy = mlir::dyn_cast(addr.getElementType()); // Don't bother with int casts if the integer size is the same. @@ -211,10 +225,38 @@ bool AtomicInfo::emitMemSetZeroIfNecessary() const { return false; cgf.cgm.errorNYI(loc, - "AtomicInfo::emitMemSetZeroIfNecessary: emit memset zero"); + "AtomicInfo::emitMemSetZeroIfNecaessary: emit memset zero"); return false; } +/// Return true if \param valueTy is a type that should be casted to integer +/// around the atomic memory operation. If \param cmpxchg is true, then the +/// cast of a floating point type is made as that instruction can not have +/// floating point operands. TODO: Allow compare-and-exchange and FP - see +/// comment in CIRGenAtomicExpandPass.cpp. +static bool shouldCastToInt(mlir::Type valueTy, bool cmpxchg) { + if (cir::isAnyFloatingPointType(valueTy)) + return isa(valueTy) || cmpxchg; + return !isa(valueTy) && !isa(valueTy); +} + +mlir::Value AtomicInfo::convertRValueToInt(RValue rvalue, bool cmpxchg) const { + // If we've got a scalar value of the right size, try to avoid going + // through memory. Floats get casted if needed by AtomicExpandPass. + if (mlir::Value value = getScalarRValValueOrNull(rvalue)) { + if (!shouldCastToInt(value.getType(), cmpxchg)) + return cgf.emitToMemory(value, valueTy); + + cgf.cgm.errorNYI( + loc, "AtomicInfo::convertRValueToInt: cast scalar rvalue to int"); + return nullptr; + } + + cgf.cgm.errorNYI( + loc, "AtomicInfo::convertRValueToInt: cast non-scalar rvalue to int"); + return nullptr; +} + /// Copy an r-value into memory as part of storing to an atomic type. /// This needs to create a bit-pattern suitable for atomic operations. void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const { @@ -815,6 +857,79 @@ RValue CIRGenFunction::emitAtomicExpr(AtomicExpr *e) { e->getExprLoc()); } +void CIRGenFunction::emitAtomicStore(RValue rvalue, LValue dest, bool isInit) { + bool isVolatile = dest.isVolatileQualified(); + auto order = cir::MemOrder::SequentiallyConsistent; + if (!dest.getType()->isAtomicType()) { + assert(!cir::MissingFeatures::atomicMicrosoftVolatile()); + } + return emitAtomicStore(rvalue, dest, order, isVolatile, isInit); +} + +/// Emit a store to an l-value of atomic type. +/// +/// Note that the r-value is expected to be an r-value of the atomic type; this +/// means that for aggregate r-values, it should include storage for any padding +/// that was necessary. +void CIRGenFunction::emitAtomicStore(RValue rvalue, LValue dest, + cir::MemOrder order, bool isVolatile, + bool isInit) { + // If this is an aggregate r-value, it should agree in type except + // maybe for address-space qualification. + mlir::Location loc = dest.getPointer().getLoc(); + assert(!rvalue.isAggregate() || + rvalue.getAggregateAddress().getElementType() == + dest.getAddress().getElementType()); + + AtomicInfo atomics(*this, dest, loc); + LValue lvalue = atomics.getAtomicLValue(); + + if (lvalue.isSimple()) { + // If this is an initialization, just put the value there normally. + if (isInit) { + atomics.emitCopyIntoMemory(rvalue); + return; + } + + // Check whether we should use a library call. + if (atomics.shouldUseLibCall()) { + assert(!cir::MissingFeatures::atomicUseLibCall()); + cgm.errorNYI(loc, "emitAtomicStore: atomic store with library call"); + return; + } + + // Okay, we're doing this natively. + mlir::Value valueToStore = atomics.convertRValueToInt(rvalue); + + // Do the atomic store. + Address addr = atomics.getAtomicAddress(); + if (mlir::Value value = atomics.getScalarRValValueOrNull(rvalue)) { + if (shouldCastToInt(value.getType(), /*CmpXchg=*/false)) { + addr = atomics.castToAtomicIntPointer(addr); + valueToStore = + builder.createIntCast(valueToStore, addr.getElementType()); + } + } + cir::StoreOp store = builder.createStore(loc, valueToStore, addr); + + // Initializations don't need to be atomic. + if (!isInit) { + assert(!cir::MissingFeatures::atomicOpenMP()); + store.setMemOrder(order); + } + + // Other decoration. + if (isVolatile) + store.setIsVolatile(true); + + assert(!cir::MissingFeatures::opLoadStoreTbaa()); + return; + } + + cgm.errorNYI(loc, "emitAtomicStore: non-simple atomic lvalue"); + assert(!cir::MissingFeatures::opLoadStoreAtomic()); +} + void CIRGenFunction::emitAtomicInit(Expr *init, LValue dest) { AtomicInfo atomics(*this, dest, getLoc(init->getSourceRange())); diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 4fb178df0e508..422fa1cf5ad2e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -311,7 +311,8 @@ static LValue emitGlobalVarDeclLValue(CIRGenFunction &cgf, const Expr *e, void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr, bool isVolatile, QualType ty, - bool isInit, bool isNontemporal) { + LValueBaseInfo baseInfo, bool isInit, + bool isNontemporal) { assert(!cir::MissingFeatures::opLoadStoreThreadLocal()); if (const auto *clangVecTy = ty->getAs()) { @@ -333,7 +334,13 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr, value = emitToMemory(value, ty); - assert(!cir::MissingFeatures::opLoadStoreAtomic()); + assert(!cir::MissingFeatures::opLoadStoreTbaa()); + LValue atomicLValue = LValue::makeAddr(addr, ty, baseInfo); + if (ty->isAtomicType() || + (!isInit && isLValueSuitableForInlineAtomic(atomicLValue))) { + emitAtomicStore(RValue::get(value), atomicLValue, isInit); + return; + } // Update the alloca with more info on initialization. assert(addr.getPointer() && "expected pointer to exist"); @@ -550,7 +557,8 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, LValue lvalue, } emitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), - lvalue.getType(), isInit, /*isNontemporal=*/false); + lvalue.getType(), lvalue.getBaseInfo(), isInit, + /*isNontemporal=*/false); } mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index dece642eb13b6..1c52a78d72e33 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1271,6 +1271,9 @@ class CIRGenFunction : public CIRGenTypeCache { RValue emitAtomicExpr(AtomicExpr *e); void emitAtomicInit(Expr *init, LValue dest); + void emitAtomicStore(RValue rvalue, LValue dest, bool isInit); + void emitAtomicStore(RValue rvalue, LValue dest, cir::MemOrder order, + bool isVolatile, bool isInit); AutoVarEmission emitAutoVarAlloca(const clang::VarDecl &d, mlir::OpBuilder::InsertPoint ip = {}); @@ -1680,8 +1683,8 @@ class CIRGenFunction : public CIRGenTypeCache { bool isInit); void emitStoreOfScalar(mlir::Value value, Address addr, bool isVolatile, - clang::QualType ty, bool isInit = false, - bool isNontemporal = false); + clang::QualType ty, LValueBaseInfo baseInfo, + bool isInit = false, bool isNontemporal = false); void emitStoreOfScalar(mlir::Value value, LValue lvalue, bool isInit); /// Store the specified rvalue into the specified diff --git a/clang/test/AST/ByteCode/records.cpp b/clang/test/AST/ByteCode/records.cpp index 83f32c97c50c7..4799ebe25dde1 100644 --- a/clang/test/AST/ByteCode/records.cpp +++ b/clang/test/AST/ByteCode/records.cpp @@ -1882,3 +1882,14 @@ namespace MethodWillHaveBody { } int n = f(0); // both-note {{instantiation of}} } + +namespace StaticRedecl { + struct T { + static T tt; + constexpr T() : p(&tt) {} + T *p; + }; + T T::tt; + constexpr T t; + static_assert(t.p == &T::tt, ""); +} diff --git a/clang/test/AST/ast-dump-arm-attr.c b/clang/test/AST/ast-dump-arm-attr.c index 78f557d4eb0b1..d26a77d067e97 100644 --- a/clang/test/AST/ast-dump-arm-attr.c +++ b/clang/test/AST/ast-dump-arm-attr.c @@ -2,7 +2,7 @@ // RUN: %clang_cc1 -triple arm-apple-darwin -ast-dump -ast-dump-filter Test %s \ // RUN: | FileCheck --strict-whitespace %s // -// RUN: %clang_cc1 -triple armv8m.base-none-eabi -mcmse -ast-dump -ast-dump-filter Test %s \ +// RUN: %clang_cc1 -triple thumbv8m.base-none-eabi -mcmse -ast-dump -ast-dump-filter Test %s \ // RUN: | FileCheck --strict-whitespace %s --check-prefix=CHECK-CMSE // // Tests with serialization: @@ -11,8 +11,8 @@ // RUN: | sed -e "s/ //" -e "s/ imported//" \ // RUN: | FileCheck --strict-whitespace %s // -// RUN: %clang_cc1 -triple armv8m.base-none-eabi -mcmse -emit-pch -o %t %s -// RUN: %clang_cc1 -x c -triple armv8m.base-none-eabi -mcmse -include-pch %t -ast-dump-all -ast-dump-filter Test /dev/null \ +// RUN: %clang_cc1 -triple thumbv8m.base-none-eabi -mcmse -emit-pch -o %t %s +// RUN: %clang_cc1 -x c -triple thumbv8m.base-none-eabi -mcmse -include-pch %t -ast-dump-all -ast-dump-filter Test /dev/null \ // RUN: | sed -e "s/ //" -e "s/ imported//" \ // RUN: | FileCheck --strict-whitespace %s diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c index 65799881a0cbe..d5bea8446d730 100644 --- a/clang/test/CIR/CodeGen/atomic.c +++ b/clang/test/CIR/CodeGen/atomic.c @@ -46,6 +46,32 @@ void f2(void) { // OGCG-NEXT: store i32 42, ptr %[[SLOT]], align 4 // OGCG: } +void f3(_Atomic(int) *p) { + *p = 42; +} + +// CIR-LABEL: @f3 +// CIR: cir.store align(4) atomic(seq_cst) %{{.+}}, %{{.+}} : !s32i, !cir.ptr + +// LLVM-LABEL: @f3 +// LLVM: store atomic i32 42, ptr %{{.+}} seq_cst, align 4 + +// OGCG-LABEL: @f3 +// OGCG: store atomic i32 42, ptr %{{.+}} seq_cst, align 4 + +void f4(_Atomic(float) *p) { + *p = 3.14; +} + +// CIR-LABEL: @f4 +// CIR: cir.store align(4) atomic(seq_cst) %{{.+}}, %{{.+}} : !cir.float, !cir.ptr + +// LLVM-LABEL: @f4 +// LLVM: store atomic float 0x40091EB860000000, ptr %{{.+}} seq_cst, align 4 + +// OGCG-LABEL: @f4 +// OGCG: store atomic float 0x40091EB860000000, ptr %{{.+}} seq_cst, align 4 + void load(int *ptr) { int x; __atomic_load(ptr, &x, __ATOMIC_RELAXED); diff --git a/clang/test/CodeGen/arm-acle-coproc.c b/clang/test/CodeGen/arm-acle-coproc.c index 5acb9f65413a0..000fff632f0b7 100644 --- a/clang/test/CodeGen/arm-acle-coproc.c +++ b/clang/test/CodeGen/arm-acle-coproc.c @@ -4,10 +4,10 @@ // RUN: %clang_cc1 -triple armv5te %s -E -dD -o - | FileCheck --check-prefix=CHECK-V5-TE %s // RUN: %clang_cc1 -triple armv5tej %s -E -dD -o - | FileCheck --check-prefix=CHECK-V5-TE %s // RUN: %clang_cc1 -triple armv6 %s -E -dD -o - | FileCheck --check-prefix=CHECK-V6 %s -// RUN: %clang_cc1 -triple armv6m %s -E -dD -o - | FileCheck --check-prefix=CHECK-V6M %s +// RUN: %clang_cc1 -triple thumbv6m %s -E -dD -o - | FileCheck --check-prefix=CHECK-V6M %s // RUN: %clang_cc1 -triple armv7a %s -E -dD -o - | FileCheck --check-prefix=CHECK-V7 %s // RUN: %clang_cc1 -triple armv7r %s -E -dD -o - | FileCheck --check-prefix=CHECK-V7 %s -// RUN: %clang_cc1 -triple armv7m %s -E -dD -o - | FileCheck --check-prefix=CHECK-V7 %s +// RUN: %clang_cc1 -triple thumbv7m %s -E -dD -o - | FileCheck --check-prefix=CHECK-V7 %s // RUN: %clang_cc1 -triple armv8a %s -E -dD -o - | FileCheck --check-prefix=CHECK-V8 %s // RUN: %clang_cc1 -triple armv8r %s -E -dD -o - | FileCheck --check-prefix=CHECK-V8 %s // RUN: %clang_cc1 -triple armv8.1a %s -E -dD -o - | FileCheck --check-prefix=CHECK-V8 %s diff --git a/clang/test/CodeGen/pr45476.cpp b/clang/test/CodeGen/pr45476.cpp index c95f7fb8cd9c3..3a67904a8e568 100644 --- a/clang/test/CodeGen/pr45476.cpp +++ b/clang/test/CodeGen/pr45476.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple armv6m-eabi -emit-llvm %s -o - | FileCheck -check-prefix=LIBCALL %s +// RUN: %clang_cc1 -triple thumbv6m-eabi -emit-llvm %s -o - | FileCheck -check-prefix=LIBCALL %s // RUN: %clang_cc1 -triple armv8-eabi -emit-llvm %s -o - | FileCheck -check-prefix=NATIVE %s // PR45476 diff --git a/clang/test/Sema/builtins-arm-exclusive-124.c b/clang/test/Sema/builtins-arm-exclusive-124.c index b35ac181f0887..93540879a01ba 100644 --- a/clang/test/Sema/builtins-arm-exclusive-124.c +++ b/clang/test/Sema/builtins-arm-exclusive-124.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple armv7m -fsyntax-only -verify %s -// RUN: %clang_cc1 -triple armv8m.main -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple thumbv7m -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple thumbv8m.main -fsyntax-only -verify %s // RUN: %clang_cc1 -triple armv8.1m.main -fsyntax-only -verify %s // All these architecture versions provide 1-, 2- or 4-byte exclusive accesses, diff --git a/clang/test/Sema/builtins-arm-exclusive-none.c b/clang/test/Sema/builtins-arm-exclusive-none.c index 2ef910dd99aaf..25a71e18935a6 100644 --- a/clang/test/Sema/builtins-arm-exclusive-none.c +++ b/clang/test/Sema/builtins-arm-exclusive-none.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple armv6m -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple thumbv6m -fsyntax-only -verify %s // Armv6-M does not support exclusive loads/stores at all, so all uses of // __builtin_arm_ldrex[d] and __builtin_arm_strex[d] is forbidden. diff --git a/clang/test/SemaCXX/dllexport.cpp b/clang/test/SemaCXX/dllexport.cpp index f503e2fc311d1..169af5cacc6c7 100644 --- a/clang/test/SemaCXX/dllexport.cpp +++ b/clang/test/SemaCXX/dllexport.cpp @@ -1,13 +1,13 @@ -// RUN: %clang_cc1 -triple i686-win32 -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DMS %s -// RUN: %clang_cc1 -triple x86_64-win32 -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DMS %s -// RUN: %clang_cc1 -triple i686-mingw32 -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DGNU %s -// RUN: %clang_cc1 -triple x86_64-mingw32 -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DGNU %s -// RUN: %clang_cc1 -triple i686-pc-cygwin -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DGNU %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DGNU %s -// RUN: %clang_cc1 -triple i686-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DWI %s -// RUN: %clang_cc1 -triple x86_64-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DWI %s -// RUN: %clang_cc1 -triple x86_64-scei-ps4 -fsyntax-only -fdeclspec -verify -std=c++11 -Wunsupported-dll-base-class-template -DPS %s -// RUN: %clang_cc1 -triple x86_64-sie-ps5 -fsyntax-only -fdeclspec -verify -std=c++1y -Wunsupported-dll-base-class-template -DPS %s +// RUN: %clang_cc1 -triple i686-win32 -fsyntax-only -fms-extensions -verify=expected,ms,non-gnu,ms-ps -std=c++11 -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple x86_64-win32 -fsyntax-only -fms-extensions -verify=expected,ms,non-gnu,ms-ps -std=c++1y -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple i686-mingw32 -fsyntax-only -fms-extensions -verify=expected,non-ms,gnu,win-gnu -std=c++1y -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple x86_64-mingw32 -fsyntax-only -fms-extensions -verify=expected,non-ms,gnu,win-gnu -std=c++11 -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple i686-pc-cygwin -fsyntax-only -fms-extensions -verify=expected,non-ms,gnu,win-gnu -std=c++1y -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple x86_64-pc-cygwin -fsyntax-only -fms-extensions -verify=expected,non-ms,gnu,win-gnu -std=c++11 -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple i686-windows-itanium -fsyntax-only -fms-extensions -verify=expected,non-ms,non-gnu,win-gnu -std=c++11 -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple x86_64-windows-itanium -fsyntax-only -fms-extensions -verify=expected,non-ms,non-gnu,win-gnu -std=c++1y -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple x86_64-scei-ps4 -fsyntax-only -fdeclspec -verify=expected,non-ms,non-gnu,ms-ps -std=c++11 -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple x86_64-sie-ps5 -fsyntax-only -fdeclspec -verify=expected,non-ms,non-gnu,ms-ps -std=c++1y -Wunsupported-dll-base-class-template %s // Helper structs to make templates more expressive. struct ImplicitInst_Exported {}; @@ -75,9 +75,7 @@ __declspec(dllexport) extern int GlobalRedecl4; // expected-warning{{redeclarati // External linkage is required. __declspec(dllexport) static int StaticGlobal; // expected-error{{'StaticGlobal' must have external linkage when declared 'dllexport'}} __declspec(dllexport) Internal InternalTypeGlobal; // expected-error{{'InternalTypeGlobal' must have external linkage when declared 'dllexport'}} -#ifndef MS -namespace { __declspec(dllexport) int InternalGlobal; } // expected-error{{'(anonymous namespace)::InternalGlobal' must have external linkage when declared 'dllexport'}} -#endif +namespace { __declspec(dllexport) int InternalGlobal; } // non-ms-error{{'(anonymous namespace)::InternalGlobal' must have external linkage when declared 'dllexport'}} namespace ns { __declspec(dllexport) int ExternalGlobal; } __declspec(dllexport) auto InternalAutoTypeGlobal = Internal(); // expected-error{{'InternalAutoTypeGlobal' must have external linkage when declared 'dllexport'}} @@ -132,9 +130,7 @@ template __declspec(dllexport) extern int VarTmplRedecl3; // expecte // External linkage is required. template __declspec(dllexport) static int StaticVarTmpl; // expected-error{{'StaticVarTmpl' must have external linkage when declared 'dllexport'}} template __declspec(dllexport) Internal InternalTypeVarTmpl; // expected-error{{'InternalTypeVarTmpl' must have external linkage when declared 'dllexport'}} -#ifndef MS -namespace { template __declspec(dllexport) int InternalVarTmpl; } // expected-error{{'(anonymous namespace)::InternalVarTmpl' must have external linkage when declared 'dllexport'}} -#endif +namespace { template __declspec(dllexport) int InternalVarTmpl; } // non-ms-error{{'(anonymous namespace)::InternalVarTmpl' must have external linkage when declared 'dllexport'}} namespace ns { template __declspec(dllexport) int ExternalVarTmpl = 1; } template __declspec(dllexport) auto InternalAutoTypeVarTmpl = Internal(); // expected-error{{'InternalAutoTypeVarTmpl' must have external linkage when declared 'dllexport'}} @@ -355,11 +351,8 @@ class __declspec(dllexport) ClassDecl; class __declspec(dllexport) ClassDef {}; -#if defined(MS) || defined (WI) || defined(PS) -// expected-warning@+3{{'dllexport' attribute ignored}} -#endif template struct PartiallySpecializedClassTemplate {}; -template struct __declspec(dllexport) PartiallySpecializedClassTemplate { void f() {} }; +template struct __declspec(dllexport) PartiallySpecializedClassTemplate { void f() {} }; // non-gnu-warning {{'dllexport' attribute ignored}} template struct ExpliciallySpecializedClassTemplate {}; template <> struct __declspec(dllexport) ExpliciallySpecializedClassTemplate { void f() {} }; @@ -373,16 +366,11 @@ ImplicitlyInstantiatedExportedTemplate implicitlyInstantiatedExp // Don't instantiate class members of templates with explicit instantiation declarations, even if they are exported. struct IncompleteType2; -#if defined(MS) || defined (WI) || defined(PS) -// expected-note@+2{{attribute is here}} -#endif -template struct __declspec(dllexport) ExportedTemplateWithExplicitInstantiationDecl { + +template struct __declspec(dllexport) ExportedTemplateWithExplicitInstantiationDecl { // non-gnu-note {{attribute is here}} int f() { return sizeof(T); } // no-error }; -#if defined(MS) || defined (WI) || defined(PS) -// expected-warning@+2{{explicit instantiation declaration should not be 'dllexport'}} -#endif -extern template struct ExportedTemplateWithExplicitInstantiationDecl; +extern template struct ExportedTemplateWithExplicitInstantiationDecl; // non-gnu-warning {{explicit instantiation declaration should not be 'dllexport'}} // Instantiate class members for explicitly instantiated exported templates. struct IncompleteType3; // expected-note{{forward declaration of 'IncompleteType3'}} @@ -392,16 +380,9 @@ template struct __declspec(dllexport) ExplicitlyInstantiatedExporte template struct ExplicitlyInstantiatedExportedTemplate; // expected-note{{in instantiation of member function 'ExplicitlyInstantiatedExportedTemplate::f' requested here}} // In MS mode, instantiate members of class templates that are base classes of exported classes. -#if defined(MS) || defined(PS) - // expected-note@+3{{forward declaration of 'IncompleteType4'}} - // expected-note@+3{{in instantiation of member function 'BaseClassTemplateOfExportedClass::f' requested here}} -#endif -struct IncompleteType4; -template struct BaseClassTemplateOfExportedClass { -#if defined(MS) || defined(PS) - // expected-error@+2{{invalid application of 'sizeof' to an incomplete type 'IncompleteType4'}} -#endif - int f() { return sizeof(T); }; +struct IncompleteType4; // ms-ps-note {{forward declaration of 'IncompleteType4'}} +template struct BaseClassTemplateOfExportedClass { // ms-ps-note {{in instantiation of member function 'BaseClassTemplateOfExportedClass::f' requested here}} + int f() { return sizeof(T); }; // ms-ps-error {{invalid application of 'sizeof' to an incomplete type 'IncompleteType4'}} }; struct __declspec(dllexport) ExportedBaseClass : public BaseClassTemplateOfExportedClass {}; @@ -414,17 +395,11 @@ struct __declspec(dllexport) ExportedBaseClass2 : public ExportedBaseClassTempla // Warn about explicit instantiation declarations of dllexport classes. template struct ExplicitInstantiationDeclTemplate {}; -#if defined(MS) || defined (WI) || defined(PS) -// expected-warning@+2{{explicit instantiation declaration should not be 'dllexport'}} expected-note@+2{{attribute is here}} -#endif -extern template struct __declspec(dllexport) ExplicitInstantiationDeclTemplate; +extern template struct __declspec(dllexport) ExplicitInstantiationDeclTemplate; // non-gnu-warning {{explicit instantiation declaration should not be 'dllexport'}} \ + non-gnu-note {{attribute is here}} -template struct __declspec(dllexport) ExplicitInstantiationDeclExportedTemplate {}; -#if defined(MS) || defined (WI) || defined(PS) -// expected-note@-2{{attribute is here}} -// expected-warning@+2{{explicit instantiation declaration should not be 'dllexport'}} -#endif -extern template struct ExplicitInstantiationDeclExportedTemplate; +template struct __declspec(dllexport) ExplicitInstantiationDeclExportedTemplate {}; // non-gnu-note {{attribute is here}} +extern template struct ExplicitInstantiationDeclExportedTemplate; // non-gnu-warning {{explicit instantiation declaration should not be 'dllexport'}} namespace { struct InternalLinkageType {}; } struct __declspec(dllexport) PR23308 { @@ -440,35 +415,23 @@ class __declspec(dllexport) ExportedClass {}; class __declspec(dllimport) ImportedClass {}; template class ClassTemplate {}; -#if not defined(MS) && not defined(PS) -// expected-error@+2{{'ExportedClassTemplate' must have external linkage when declared 'dllexport'}} -#endif -template class __declspec(dllexport) ExportedClassTemplate {}; +template class __declspec(dllexport) ExportedClassTemplate {}; // win-gnu-error {{'ExportedClassTemplate' must have external linkage when declared 'dllexport'}} template class __declspec(dllimport) ImportedClassTemplate {}; template struct ExplicitlySpecializedTemplate { void func() {} }; -#if defined(MS) || defined(PS) -// expected-note@+2{{class template 'ExplicitlySpecializedTemplate' was explicitly specialized here}} -#endif -template <> struct ExplicitlySpecializedTemplate { void func() {} }; +template <> struct ExplicitlySpecializedTemplate { void func() {} }; // ms-ps-note {{class template 'ExplicitlySpecializedTemplate' was explicitly specialized here}} template struct ExplicitlyExportSpecializedTemplate { void func() {} }; template <> struct __declspec(dllexport) ExplicitlyExportSpecializedTemplate { void func() {} }; template struct ExplicitlyImportSpecializedTemplate { void func() {} }; template <> struct __declspec(dllimport) ExplicitlyImportSpecializedTemplate { void func() {} }; template struct ExplicitlyInstantiatedTemplate { void func() {} }; -#if defined(MS) || defined(PS) -// expected-note@+2{{class template 'ExplicitlyInstantiatedTemplate' was instantiated here}} -#endif -template struct ExplicitlyInstantiatedTemplate; +template struct ExplicitlyInstantiatedTemplate; // ms-ps-note {{class template 'ExplicitlyInstantiatedTemplate' was instantiated here}} template struct ExplicitlyExportInstantiatedTemplate { void func() {} }; template struct __declspec(dllexport) ExplicitlyExportInstantiatedTemplate; template struct ExplicitlyExportDeclaredInstantiatedTemplate { void func() {} }; extern template struct ExplicitlyExportDeclaredInstantiatedTemplate; -#if not defined(MS) && not defined (WI) && not defined(PS) -// expected-warning@+2{{'dllexport' attribute ignored on explicit instantiation definition}} -#endif -template struct __declspec(dllexport) ExplicitlyExportDeclaredInstantiatedTemplate; +template struct __declspec(dllexport) ExplicitlyExportDeclaredInstantiatedTemplate; // gnu-warning {{'dllexport' attribute ignored on explicit instantiation definition}} template struct ExplicitlyImportInstantiatedTemplate { void func() {} }; template struct __declspec(dllimport) ExplicitlyImportInstantiatedTemplate; @@ -496,11 +459,8 @@ class __declspec(dllexport) DerivedFromTemplateB : public ClassTemplate {} // The second derived class doesn't change anything, the attribute that was propagated first wins. class __declspec(dllimport) DerivedFromTemplateB2 : public ClassTemplate {}; -#if defined(MS) || defined(PS) -// expected-warning@+3{{propagating dll attribute to explicitly specialized base class template without dll attribute is not supported}} -// expected-note@+2{{attribute is here}} -#endif -struct __declspec(dllexport) DerivedFromExplicitlySpecializedTemplate : public ExplicitlySpecializedTemplate {}; +struct __declspec(dllexport) DerivedFromExplicitlySpecializedTemplate : public ExplicitlySpecializedTemplate {}; // ms-ps-warning {{propagating dll attribute to explicitly specialized base class template without dll attribute is not supported}} \ + ms-ps-note {{attribute is here}} // Base class alredy specialized with export attribute. struct __declspec(dllexport) DerivedFromExplicitlyExportSpecializedTemplate : public ExplicitlyExportSpecializedTemplate {}; @@ -508,11 +468,8 @@ struct __declspec(dllexport) DerivedFromExplicitlyExportSpecializedTemplate : pu // Base class already specialized with import attribute. struct __declspec(dllexport) DerivedFromExplicitlyImportSpecializedTemplate : public ExplicitlyImportSpecializedTemplate {}; -#if defined(MS) || defined(PS) -// expected-warning@+3{{propagating dll attribute to already instantiated base class template without dll attribute is not supported}} -// expected-note@+2{{attribute is here}} -#endif -struct __declspec(dllexport) DerivedFromExplicitlyInstantiatedTemplate : public ExplicitlyInstantiatedTemplate {}; +struct __declspec(dllexport) DerivedFromExplicitlyInstantiatedTemplate : public ExplicitlyInstantiatedTemplate {}; // ms-ps-warning {{propagating dll attribute to already instantiated base class template without dll attribute is not supported}} \ + ms-ps-note {{attribute is here}} // Base class already instantiated with export attribute. struct __declspec(dllexport) DerivedFromExplicitlyExportInstantiatedTemplate : public ExplicitlyExportInstantiatedTemplate {}; @@ -528,10 +485,7 @@ void func() { // MSVC allows deriving from exported template classes in local contexts. class LocalDerivedFromExportedClass : public ExportedClass {}; class LocalDerivedFromExportedTemplate : public ExportedClassTemplate {}; -#if not defined(MS) && not defined (PS) - // expected-note@+2{{in instantiation of template class 'ExportedClassTemplate' requested here}} -#endif - class LocalCRTP : public ExportedClassTemplate {}; + class LocalCRTP : public ExportedClassTemplate {}; // win-gnu-note {{in instantiation of template class 'ExportedClassTemplate' requested here}} } //===----------------------------------------------------------------------===// @@ -778,46 +732,40 @@ __declspec(dllexport) void MemberRedecl::staticInlineDecl() {} // expect __declspec(dllexport) int MemberRedecl::StaticField = 1; // expected-error{{redeclaration of 'MemberRedecl::StaticField' cannot add 'dllexport' attribute}} __declspec(dllexport) const int MemberRedecl::StaticConstField = 1; // expected-error{{redeclaration of 'MemberRedecl::StaticConstField' cannot add 'dllexport' attribute}} -#ifdef MS -// expected-warning@+4{{attribute declaration must precede definition}} -#else -// expected-error@+2{{redeclaration of 'MemberRedecl::ConstexprField' cannot add 'dllexport' attribute}} -#endif -__declspec(dllexport) constexpr int MemberRedecl::ConstexprField; -#ifdef MS +__declspec(dllexport) constexpr int MemberRedecl::ConstexprField; // ms-warning {{attribute declaration must precede definition}} \ + non-ms-error {{redeclaration of 'MemberRedecl::ConstexprField' cannot add 'dllexport' attribute}} + struct __declspec(dllexport) ClassWithMultipleDefaultCtors { - ClassWithMultipleDefaultCtors(int = 40) {} // expected-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}} - ClassWithMultipleDefaultCtors(int = 30, ...) {} // expected-note{{declared here}} + ClassWithMultipleDefaultCtors(int = 40) {} // ms-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}} + ClassWithMultipleDefaultCtors(int = 30, ...) {} // ms-note{{declared here}} }; template struct ClassTemplateWithMultipleDefaultCtors { - __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 40) {} // expected-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}} - __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 30, ...) {} // expected-note{{declared here}} + __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 40) {} // ms-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}} + __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 30, ...) {} // ms-note{{declared here}} }; template struct HasDefaults { - HasDefaults(int x = sizeof(T)) {} // expected-error {{invalid application of 'sizeof'}} + HasDefaults(int x = sizeof(T)) {} // ms-error {{invalid application of 'sizeof'}} }; template struct __declspec(dllexport) HasDefaults; template struct -__declspec(dllexport) // expected-note {{in instantiation of default function argument expression for 'HasDefaults' required here}} -HasDefaults; // expected-note {{in instantiation of member function 'HasDefaults::HasDefaults' requested here}} +__declspec(dllexport) // ms-note {{in instantiation of default function argument expression for 'HasDefaults' required here}} +HasDefaults; // ms-note {{in instantiation of member function 'HasDefaults::HasDefaults' requested here}} template struct HasDefaults2 { - __declspec(dllexport) // expected-note {{in instantiation of default function argument expression for 'HasDefaults2' required here}} - HasDefaults2(int x = sizeof(T)) {} // expected-error {{invalid application of 'sizeof'}} + __declspec(dllexport) // ms-note {{in instantiation of default function argument expression for 'HasDefaults2' required here}} + HasDefaults2(int x = sizeof(T)) {} // ms-error {{invalid application of 'sizeof'}} }; -template struct HasDefaults2; // expected-note {{in instantiation of member function 'HasDefaults2::HasDefaults2' requested here}} +template struct HasDefaults2; // ms-note {{in instantiation of member function 'HasDefaults2::HasDefaults2' requested here}} -template struct __declspec(dllexport) HasDefaults3 { // expected-note{{in instantiation of default function argument expression for 'HasDefaults3' required here}} - HasDefaults3(int x = sizeof(T)) {} // expected-error {{invalid application of 'sizeof'}} +template struct __declspec(dllexport) HasDefaults3 { // ms-note{{in instantiation of default function argument expression for 'HasDefaults3' required here}} + HasDefaults3(int x = sizeof(T)) {} // ms-error {{invalid application of 'sizeof'}} }; template <> HasDefaults3::HasDefaults3(int) {}; -#endif - //===----------------------------------------------------------------------===// // Class member templates //===----------------------------------------------------------------------===// @@ -887,12 +835,8 @@ template __declspec(dllexport) void MemTmplRedecl::staticInli template __declspec(dllexport) int MemTmplRedecl::StaticField = 1; // expected-error{{redeclaration of 'MemTmplRedecl::StaticField' cannot add 'dllexport' attribute}} template __declspec(dllexport) const int MemTmplRedecl::StaticConstField = 1; // expected-error{{redeclaration of 'MemTmplRedecl::StaticConstField' cannot add 'dllexport' attribute}} -#ifdef MS -// expected-warning@+4{{attribute declaration must precede definition}} -#else -// expected-error@+2{{redeclaration of 'MemTmplRedecl::ConstexprField' cannot add 'dllexport' attribute}} -#endif -template __declspec(dllexport) constexpr int MemTmplRedecl::ConstexprField; +template __declspec(dllexport) constexpr int MemTmplRedecl::ConstexprField; // ms-warning {{attribute declaration must precede definition}} \ + non-ms-error {{redeclaration of 'MemTmplRedecl::ConstexprField' cannot add 'dllexport' attribute}} #endif // __has_feature(cxx_variable_templates) @@ -1097,20 +1041,13 @@ template __declspec(dllexport) void CTMR::staticInlineDecl template __declspec(dllexport) int CTMR::StaticField = 1; // expected-error{{redeclaration of 'CTMR::StaticField' cannot add 'dllexport' attribute}} template __declspec(dllexport) const int CTMR::StaticConstField = 1; // expected-error{{redeclaration of 'CTMR::StaticConstField' cannot add 'dllexport' attribute}} -#ifdef MS -// expected-warning@+4{{attribute declaration must precede definition}} -#else -// expected-error@+2{{redeclaration of 'CTMR::ConstexprField' cannot add 'dllexport' attribute}} -#endif -template __declspec(dllexport) constexpr int CTMR::ConstexprField; +template __declspec(dllexport) constexpr int CTMR::ConstexprField; // ms-warning {{attribute declaration must precede definition}} \ + non-ms-error {{redeclaration of 'CTMR::ConstexprField' cannot add 'dllexport' attribute}} // MSVC exports explicit specialization of exported class template member // function, and errors on such definitions. MinGW does not treat them as // dllexport. -#if !defined(GNU) -// expected-error@+2{{attribute 'dllexport' cannot be applied to a deleted function}} -#endif -template <> void ExportClassTmplMembers::normalDecl() = delete; +template <> void ExportClassTmplMembers::normalDecl() = delete; // non-gnu-error {{attribute 'dllexport' cannot be applied to a deleted function}} //===----------------------------------------------------------------------===// @@ -1183,12 +1120,8 @@ template template __declspec(dllexport) void CTMT #if __has_feature(cxx_variable_templates) template template __declspec(dllexport) int CTMTR::StaticField = 1; // expected-error{{redeclaration of 'CTMTR::StaticField' cannot add 'dllexport' attribute}} template template __declspec(dllexport) const int CTMTR::StaticConstField = 1; // expected-error{{redeclaration of 'CTMTR::StaticConstField' cannot add 'dllexport' attribute}} -#ifdef MS -// expected-warning@+4{{attribute declaration must precede definition}} -#else -// expected-error@+2{{redeclaration of 'CTMTR::ConstexprField' cannot add 'dllexport' attribute}} -#endif -template template __declspec(dllexport) constexpr int CTMTR::ConstexprField; +template template __declspec(dllexport) constexpr int CTMTR::ConstexprField; // ms-warning {{attribute declaration must precede definition}} \ + non-ms-error {{redeclaration of 'CTMTR::ConstexprField' cannot add 'dllexport' attribute}} #endif // __has_feature(cxx_variable_templates) // FIXME: Precedence rules seem to be different for classes. @@ -1197,7 +1130,4 @@ template template __declspec(dllexport) constexpr int CT // Lambdas //===----------------------------------------------------------------------===// // The MS ABI doesn't provide a stable mangling for lambdas, so they can't be imported or exported. -#if defined(MS) || defined (WI) || defined(PS) -// expected-error@+2{{lambda cannot be declared 'dllexport'}} -#endif -auto Lambda = []() __declspec(dllexport) -> bool { return true; }; +auto Lambda = []() __declspec(dllexport) -> bool { return true; }; // non-gnu-error {{lambda cannot be declared 'dllexport'}} diff --git a/libc/shared/math.h b/libc/shared/math.h index bd6aee73c3933..282dd6243d6a7 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -51,6 +51,7 @@ #include "math/exp2f.h" #include "math/exp2f16.h" #include "math/exp2m1f.h" +#include "math/exp2m1f16.h" #include "math/expf.h" #include "math/expf16.h" #include "math/frexpf.h" diff --git a/libc/shared/math/exp2m1f16.h b/libc/shared/math/exp2m1f16.h new file mode 100644 index 0000000000000..96a404708be18 --- /dev/null +++ b/libc/shared/math/exp2m1f16.h @@ -0,0 +1,29 @@ +//===-- Shared exp2m1f16 function -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_EXP2M1F16_H +#define LLVM_LIBC_SHARED_MATH_EXP2M1F16_H + +#include "include/llvm-libc-macros/float16-macros.h" +#include "shared/libc_common.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/math/exp2m1f16.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::exp2m1f16; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SHARED_MATH_EXP2M1F16_H diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 620900028d424..ddc0159b10ce4 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -769,6 +769,24 @@ add_header_library( libc.src.__support.macros.properties.cpu_features ) +add_header_library( + exp2m1f16 + HDRS + exp2m1f16.h + DEPENDS + .expxf16_utils + libc.src.__support.common + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization + libc.src.__support.macros.properties.cpu_features +) + add_header_library( exp10 HDRS diff --git a/libc/src/__support/math/exp2m1f16.h b/libc/src/__support/math/exp2m1f16.h new file mode 100644 index 0000000000000..0424af4aa953d --- /dev/null +++ b/libc/src/__support/math/exp2m1f16.h @@ -0,0 +1,180 @@ +//===-- Implementation header for exp2m1f16 ----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" +#include "src/__support/macros/properties/cpu_features.h" +#include "src/__support/math/expxf16_utils.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float16 exp2m1f16(float16 x) { +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + constexpr fputil::ExceptValues EXP2M1F16_EXCEPTS_LO = {{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.cf4p-13, exp2m1f16(x) = 0x1.41p-13 (RZ) + {0x0b3dU, 0x0904U, 1U, 0U, 1U}, + // x = 0x1.4fcp-12, exp2m1f16(x) = 0x1.d14p-13 (RZ) + {0x0d3fU, 0x0b45U, 1U, 0U, 1U}, + // x = 0x1.63p-11, exp2m1f16(x) = 0x1.ec4p-12 (RZ) + {0x118cU, 0x0fb1U, 1U, 0U, 0U}, + // x = 0x1.6fp-7, exp2m1f16(x) = 0x1.fe8p-8 (RZ) + {0x21bcU, 0x1ffaU, 1U, 0U, 1U}, + // x = -0x1.c6p-10, exp2m1f16(x) = -0x1.3a8p-10 (RZ) + {0x9718U, 0x94eaU, 0U, 1U, 0U}, + // x = -0x1.cfcp-10, exp2m1f16(x) = -0x1.414p-10 (RZ) + {0x973fU, 0x9505U, 0U, 1U, 0U}, + }}; + +#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT + constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 6; +#else + constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 7; +#endif + + constexpr fputil::ExceptValues + EXP2M1F16_EXCEPTS_HI = {{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.e58p-3, exp2m1f16(x) = 0x1.6dcp-3 (RZ) + {0x3396U, 0x31b7U, 1U, 0U, 0U}, +#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT + // x = 0x1.2e8p-2, exp2m1f16(x) = 0x1.d14p-3 (RZ) + {0x34baU, 0x3345U, 1U, 0U, 0U}, +#endif + // x = 0x1.ad8p-2, exp2m1f16(x) = 0x1.598p-2 (RZ) + {0x36b6U, 0x3566U, 1U, 0U, 0U}, +#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT + // x = 0x1.edcp-2, exp2m1f16(x) = 0x1.964p-2 (RZ) + {0x37b7U, 0x3659U, 1U, 0U, 1U}, +#endif + // x = -0x1.804p-3, exp2m1f16(x) = -0x1.f34p-4 (RZ) + {0xb201U, 0xafcdU, 0U, 1U, 1U}, + // x = -0x1.f3p-3, exp2m1f16(x) = -0x1.3e4p-3 (RZ) + {0xb3ccU, 0xb0f9U, 0U, 1U, 0U}, + // x = -0x1.294p-1, exp2m1f16(x) = -0x1.53p-2 (RZ) + {0xb8a5U, 0xb54cU, 0U, 1U, 1U}, +#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT + // x = -0x1.a34p-1, exp2m1f16(x) = -0x1.bb4p-2 (RZ) + {0xba8dU, 0xb6edU, 0U, 1U, 1U}, +#endif + }}; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + using namespace math::expxf16_internal; + using FPBits = fputil::FPBits; + FPBits x_bits(x); + + uint16_t x_u = x_bits.uintval(); + uint16_t x_abs = x_u & 0x7fffU; + + // When |x| <= 2^(-3), or |x| >= 11, or x is NaN. + if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x4980U)) { + // exp2m1(NaN) = NaN + if (x_bits.is_nan()) { + if (x_bits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + + // When x >= 16. + if (x_u >= 0x4c00 && x_bits.is_pos()) { + // exp2m1(+inf) = +inf + if (x_bits.is_inf()) + return FPBits::inf().get_val(); + + switch (fputil::quick_get_round()) { + case FE_TONEAREST: + case FE_UPWARD: + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT); + return FPBits::inf().get_val(); + default: + return FPBits::max_normal().get_val(); + } + } + + // When x < -11. + if (x_u > 0xc980U) { + // exp2m1(-inf) = -1 + if (x_bits.is_inf()) + return FPBits::one(Sign::NEG).get_val(); + + // When -12 < x < -11, round(2^x - 1, HP, RN) = -0x1.ffcp-1. + if (x_u < 0xca00U) + return fputil::round_result_slightly_down( + fputil::cast(-0x1.ffcp-1)); + + // When x <= -12, round(2^x - 1, HP, RN) = -1. + switch (fputil::quick_get_round()) { + case FE_TONEAREST: + case FE_DOWNWARD: + return FPBits::one(Sign::NEG).get_val(); + default: + return fputil::cast(-0x1.ffcp-1); + } + } + + // When |x| <= 2^(-3). + if (x_abs <= 0x3000U) { +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + if (auto r = EXP2M1F16_EXCEPTS_LO.lookup(x_u); + LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + float xf = x; + // Degree-5 minimax polynomial generated by Sollya with the following + // commands: + // > display = hexadecimal; + // > P = fpminimax((2^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]); + // > x * P; + return fputil::cast( + xf * fputil::polyeval(xf, 0x1.62e43p-1f, 0x1.ebfbdep-3f, + 0x1.c6af88p-5f, 0x1.3b45d6p-7f, + 0x1.641e7cp-10f)); + } + } + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + if (auto r = EXP2M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + // exp2(x) = exp2(hi + mid) * exp2(lo) + auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x); + // exp2m1(x) = exp2(hi + mid) * exp2(lo) - 1 + return fputil::cast( + fputil::multiply_add(exp2_hi_mid, exp2_lo, -1.0f)); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index c048a64db6bc2..e71300536616b 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1498,19 +1498,7 @@ add_entrypoint_object( HDRS ../exp2m1f16.h DEPENDS - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.common - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.cpu_features - libc.src.__support.math.expxf16_utils + libc.src.__support.math.exp2m1f16 ) add_entrypoint_object( diff --git a/libc/src/math/generic/exp2m1f16.cpp b/libc/src/math/generic/exp2m1f16.cpp index ce0cc60748f19..497a2887cea4c 100644 --- a/libc/src/math/generic/exp2m1f16.cpp +++ b/libc/src/math/generic/exp2m1f16.cpp @@ -7,163 +7,12 @@ //===----------------------------------------------------------------------===// #include "src/math/exp2m1f16.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" -#include "src/__support/macros/properties/cpu_features.h" -#include "src/__support/math/expxf16_utils.h" +#include "src/__support/math/exp2m1f16.h" namespace LIBC_NAMESPACE_DECL { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -static constexpr fputil::ExceptValues EXP2M1F16_EXCEPTS_LO = {{ - // (input, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.cf4p-13, exp2m1f16(x) = 0x1.41p-13 (RZ) - {0x0b3dU, 0x0904U, 1U, 0U, 1U}, - // x = 0x1.4fcp-12, exp2m1f16(x) = 0x1.d14p-13 (RZ) - {0x0d3fU, 0x0b45U, 1U, 0U, 1U}, - // x = 0x1.63p-11, exp2m1f16(x) = 0x1.ec4p-12 (RZ) - {0x118cU, 0x0fb1U, 1U, 0U, 0U}, - // x = 0x1.6fp-7, exp2m1f16(x) = 0x1.fe8p-8 (RZ) - {0x21bcU, 0x1ffaU, 1U, 0U, 1U}, - // x = -0x1.c6p-10, exp2m1f16(x) = -0x1.3a8p-10 (RZ) - {0x9718U, 0x94eaU, 0U, 1U, 0U}, - // x = -0x1.cfcp-10, exp2m1f16(x) = -0x1.414p-10 (RZ) - {0x973fU, 0x9505U, 0U, 1U, 0U}, -}}; - -#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT -static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 6; -#else -static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 7; -#endif - -static constexpr fputil::ExceptValues - EXP2M1F16_EXCEPTS_HI = {{ - // (input, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.e58p-3, exp2m1f16(x) = 0x1.6dcp-3 (RZ) - {0x3396U, 0x31b7U, 1U, 0U, 0U}, -#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT - // x = 0x1.2e8p-2, exp2m1f16(x) = 0x1.d14p-3 (RZ) - {0x34baU, 0x3345U, 1U, 0U, 0U}, -#endif - // x = 0x1.ad8p-2, exp2m1f16(x) = 0x1.598p-2 (RZ) - {0x36b6U, 0x3566U, 1U, 0U, 0U}, -#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT - // x = 0x1.edcp-2, exp2m1f16(x) = 0x1.964p-2 (RZ) - {0x37b7U, 0x3659U, 1U, 0U, 1U}, -#endif - // x = -0x1.804p-3, exp2m1f16(x) = -0x1.f34p-4 (RZ) - {0xb201U, 0xafcdU, 0U, 1U, 1U}, - // x = -0x1.f3p-3, exp2m1f16(x) = -0x1.3e4p-3 (RZ) - {0xb3ccU, 0xb0f9U, 0U, 1U, 0U}, - // x = -0x1.294p-1, exp2m1f16(x) = -0x1.53p-2 (RZ) - {0xb8a5U, 0xb54cU, 0U, 1U, 1U}, -#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT - // x = -0x1.a34p-1, exp2m1f16(x) = -0x1.bb4p-2 (RZ) - {0xba8dU, 0xb6edU, 0U, 1U, 1U}, -#endif - }}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - LLVM_LIBC_FUNCTION(float16, exp2m1f16, (float16 x)) { - using namespace math::expxf16_internal; - using FPBits = fputil::FPBits; - FPBits x_bits(x); - - uint16_t x_u = x_bits.uintval(); - uint16_t x_abs = x_u & 0x7fffU; - - // When |x| <= 2^(-3), or |x| >= 11, or x is NaN. - if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x4980U)) { - // exp2m1(NaN) = NaN - if (x_bits.is_nan()) { - if (x_bits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - return x; - } - - // When x >= 16. - if (x_u >= 0x4c00 && x_bits.is_pos()) { - // exp2m1(+inf) = +inf - if (x_bits.is_inf()) - return FPBits::inf().get_val(); - - switch (fputil::quick_get_round()) { - case FE_TONEAREST: - case FE_UPWARD: - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT); - return FPBits::inf().get_val(); - default: - return FPBits::max_normal().get_val(); - } - } - - // When x < -11. - if (x_u > 0xc980U) { - // exp2m1(-inf) = -1 - if (x_bits.is_inf()) - return FPBits::one(Sign::NEG).get_val(); - - // When -12 < x < -11, round(2^x - 1, HP, RN) = -0x1.ffcp-1. - if (x_u < 0xca00U) - return fputil::round_result_slightly_down( - fputil::cast(-0x1.ffcp-1)); - - // When x <= -12, round(2^x - 1, HP, RN) = -1. - switch (fputil::quick_get_round()) { - case FE_TONEAREST: - case FE_DOWNWARD: - return FPBits::one(Sign::NEG).get_val(); - default: - return fputil::cast(-0x1.ffcp-1); - } - } - - // When |x| <= 2^(-3). - if (x_abs <= 0x3000U) { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - if (auto r = EXP2M1F16_EXCEPTS_LO.lookup(x_u); - LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - float xf = x; - // Degree-5 minimax polynomial generated by Sollya with the following - // commands: - // > display = hexadecimal; - // > P = fpminimax((2^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]); - // > x * P; - return fputil::cast( - xf * fputil::polyeval(xf, 0x1.62e43p-1f, 0x1.ebfbdep-3f, - 0x1.c6af88p-5f, 0x1.3b45d6p-7f, - 0x1.641e7cp-10f)); - } - } - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - if (auto r = EXP2M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // exp2(x) = exp2(hi + mid) * exp2(lo) - auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x); - // exp2m1(x) = exp2(hi + mid) * exp2(lo) - 1 - return fputil::cast( - fputil::multiply_add(exp2_hi_mid, exp2_lo, -1.0f)); + return math::exp2m1f16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/UnitTest/FEnvSafeTest.cpp b/libc/test/UnitTest/FEnvSafeTest.cpp index 4393f9d5e5c3b..64f50d7be7fe3 100644 --- a/libc/test/UnitTest/FEnvSafeTest.cpp +++ b/libc/test/UnitTest/FEnvSafeTest.cpp @@ -43,7 +43,8 @@ void FEnvSafeTest::set_fenv(const fenv_t &fenv) { void FEnvSafeTest::expect_fenv_eq(const fenv_t &before_fenv, const fenv_t &after_fenv) { -#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && !defined(LIBC_COMPILER_IS_MSVC) +#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && !defined(LIBC_COMPILER_IS_MSVC) && \ + defined(__ARM_FP) using FPState = LIBC_NAMESPACE::fputil::FEnv::FPState; const FPState &before_state = reinterpret_cast(before_fenv); const FPState &after_state = reinterpret_cast(after_fenv); diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt index aede395350821..762b5b0417ef6 100644 --- a/libc/test/shared/CMakeLists.txt +++ b/libc/test/shared/CMakeLists.txt @@ -44,6 +44,7 @@ add_fp_unittest( libc.src.__support.math.exp2f libc.src.__support.math.exp2f16 libc.src.__support.math.exp2m1f + libc.src.__support.math.exp2m1f16 libc.src.__support.math.exp10 libc.src.__support.math.exp10f libc.src.__support.math.exp10f16 diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp index a6825a10654c9..5b409781a5b07 100644 --- a/libc/test/shared/shared_math_test.cpp +++ b/libc/test/shared/shared_math_test.cpp @@ -29,6 +29,7 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) { EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp10f16(0.0f16)); EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::exp10m1f16(0.0f16)); EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp2f16(0.0f16)); + EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::exp2m1f16(0.0f16)); EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::expf16(0.0f16)); ASSERT_FP_EQ(float16(8 << 5), LIBC_NAMESPACE::shared::ldexpf16(8.0f16, 5)); diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td index 84026aa9d3624..1c46965d995fe 100644 --- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -1192,4 +1192,42 @@ def int_loongarch_lasx_xvstelm_w def int_loongarch_lasx_xvstelm_d : VecInt<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; + +// LASX and LSX conversion +def int_loongarch_lasx_cast_128_s + : VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_cast_128_d + : VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_cast_128 + : VecInt<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_concat_128_s + : VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_concat_128_d + : VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_concat_128 + : VecInt<[llvm_v4i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_lo_s + : VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_lo_d + : VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_lo + : VecInt<[llvm_v2i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_hi_s + : VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_hi_d + : VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_hi + : VecInt<[llvm_v2i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_lo_s + : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_lo_d + : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_lo + : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_hi_s + : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_hi_d + : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_hi + : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } // TargetPrefix = "loongarch" diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index fe700e17d341b..cf4ffc82f6009 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -6630,6 +6630,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0), N->getOperand(1)); break; + case Intrinsic::loongarch_lasx_concat_128_s: + case Intrinsic::loongarch_lasx_concat_128_d: + case Intrinsic::loongarch_lasx_concat_128: + return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), + N->getOperand(1), N->getOperand(2)); } return SDValue(); } diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index b502b056c4cdf..00d52870f1727 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -2113,6 +2113,37 @@ defm : subvector_subreg_lowering; defm : subvector_subreg_lowering; defm : subvector_subreg_lowering; +// LASX and LSX conversion +def : Pat<(int_loongarch_lasx_cast_128_s (v4f32 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_cast_128_d (v2f64 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_cast_128 (v2i64 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo_s (v8f32 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo_d (v4f64 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo (v4i64 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi_s (v8f32 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi_d (v4f64 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi (v4i64 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_insert_128_lo_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_lo_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_lo (v4i64 LASX256:$src), (v2i64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_hi_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; +def : Pat<(int_loongarch_lasx_insert_128_hi_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; +def : Pat<(int_loongarch_lasx_insert_128_hi (v4i64 LASX256:$src), (v2i64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; } // Predicates = [HasExtLASX] /// Intrinsic pattern diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 0ff178e1f1959..e9088a4d9275c 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -58,6 +58,7 @@ add_llvm_target(RISCVCodeGen RISCVMoveMerger.cpp RISCVOptWInstrs.cpp RISCVPostRAExpandPseudoInsts.cpp + RISCVPromoteConstant.cpp RISCVPushPopOptimizer.cpp RISCVRedundantCopyElimination.cpp RISCVRegisterInfo.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index ae9410193efe1..51e8e8574ed15 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -20,6 +20,7 @@ namespace llvm { class FunctionPass; class InstructionSelector; +class ModulePass; class PassRegistry; class RISCVRegisterBankInfo; class RISCVSubtarget; @@ -111,6 +112,9 @@ void initializeRISCVO0PreLegalizerCombinerPass(PassRegistry &); FunctionPass *createRISCVPreLegalizerCombiner(); void initializeRISCVPreLegalizerCombinerPass(PassRegistry &); +ModulePass *createRISCVPromoteConstantPass(); +void initializeRISCVPromoteConstantPass(PassRegistry &); + FunctionPass *createRISCVVLOptimizerPass(); void initializeRISCVVLOptimizerPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp b/llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp new file mode 100644 index 0000000000000..bf1f69f8e8d93 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp @@ -0,0 +1,213 @@ +//==- RISCVPromoteConstant.cpp - Promote constant fp to global for RISC-V --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-promote-const" +#define RISCV_PROMOTE_CONSTANT_NAME "RISC-V Promote Constants" + +STATISTIC(NumPromoted, "Number of constant literals promoted to globals"); +STATISTIC(NumPromotedUses, "Number of uses of promoted literal constants"); + +namespace { + +class RISCVPromoteConstant : public ModulePass { +public: + static char ID; + RISCVPromoteConstant() : ModulePass(ID) {} + + StringRef getPassName() const override { return RISCV_PROMOTE_CONSTANT_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesCFG(); + } + + /// Iterate over the functions and promote the double fp constants that + /// would otherwise go into the constant pool to a constant array. + bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + // TargetMachine and Subtarget are needed to query isFPImmlegal. + const TargetPassConfig &TPC = getAnalysis(); + const TargetMachine &TM = TPC.getTM(); + bool Changed = false; + for (Function &F : M) { + const RISCVSubtarget &ST = TM.getSubtarget(F); + const RISCVTargetLowering *TLI = ST.getTargetLowering(); + Changed |= runOnFunction(F, TLI); + } + return Changed; + } + +private: + bool runOnFunction(Function &F, const RISCVTargetLowering *TLI); +}; +} // end anonymous namespace + +char RISCVPromoteConstant::ID = 0; + +INITIALIZE_PASS(RISCVPromoteConstant, DEBUG_TYPE, RISCV_PROMOTE_CONSTANT_NAME, + false, false) + +ModulePass *llvm::createRISCVPromoteConstantPass() { + return new RISCVPromoteConstant(); +} + +bool RISCVPromoteConstant::runOnFunction(Function &F, + const RISCVTargetLowering *TLI) { + if (F.hasOptNone() || F.hasOptSize()) + return false; + + // Bail out and make no transformation if the target doesn't support + // doubles, or if we're not targeting RV64 as we currently see some + // regressions for those targets. + if (!TLI->isTypeLegal(MVT::f64) || !TLI->isTypeLegal(MVT::i64)) + return false; + + // Collect all unique double constants and their uses in the function. Use + // MapVector to preserve insertion order. + MapVector> ConstUsesMap; + + for (Instruction &I : instructions(F)) { + for (Use &U : I.operands()) { + auto *C = dyn_cast(U.get()); + if (!C || !C->getType()->isDoubleTy()) + continue; + // Do not promote if it wouldn't be loaded from the constant pool. + if (TLI->isFPImmLegal(C->getValueAPF(), MVT::f64, + /*ForCodeSize=*/false)) + continue; + // Do not promote a constant if it is used as an immediate argument + // for an intrinsic. + if (auto *II = dyn_cast(U.getUser())) { + Function *IntrinsicFunc = II->getFunction(); + unsigned OperandIdx = U.getOperandNo(); + if (IntrinsicFunc && IntrinsicFunc->getAttributes().hasParamAttr( + OperandIdx, Attribute::ImmArg)) { + LLVM_DEBUG(dbgs() << "Skipping promotion of constant in: " << *II + << " because operand " << OperandIdx + << " must be an immediate.\n"); + continue; + } + } + // Note: FP args to inline asm would be problematic if we had a + // constraint that required an immediate floating point operand. At the + // time of writing LLVM doesn't recognise such a constraint. + ConstUsesMap[C].push_back(&U); + } + } + + int PromotableConstants = ConstUsesMap.size(); + LLVM_DEBUG(dbgs() << "Found " << PromotableConstants + << " promotable constants in " << F.getName() << "\n"); + // Bail out if no promotable constants found, or if only one is found. + if (PromotableConstants < 2) { + LLVM_DEBUG(dbgs() << "Performing no promotions as insufficient promotable " + "constants found\n"); + return false; + } + + NumPromoted += PromotableConstants; + + // Create a global array containing the promoted constants. + Module *M = F.getParent(); + Type *DoubleTy = Type::getDoubleTy(M->getContext()); + + SmallVector ConstantVector; + for (auto const &Pair : ConstUsesMap) + ConstantVector.push_back(Pair.first); + + ArrayType *ArrayTy = ArrayType::get(DoubleTy, ConstantVector.size()); + Constant *GlobalArrayInitializer = + ConstantArray::get(ArrayTy, ConstantVector); + + auto *GlobalArray = new GlobalVariable( + *M, ArrayTy, + /*isConstant=*/true, GlobalValue::InternalLinkage, GlobalArrayInitializer, + ".promoted_doubles." + F.getName()); + + // A cache to hold the loaded value for a given constant within a basic block. + DenseMap, Value *> LocalLoads; + + // Replace all uses with the loaded value. + unsigned Idx = 0; + for (auto const &Pair : ConstUsesMap) { + ConstantFP *Const = Pair.first; + const SmallVector &Uses = Pair.second; + + for (Use *U : Uses) { + Instruction *UserInst = cast(U->getUser()); + BasicBlock *InsertionBB; + + // If the user is a PHI node, we must insert the load in the + // corresponding predecessor basic block. Otherwise, it's inserted into + // the same block as the use. + if (auto *PN = dyn_cast(UserInst)) + InsertionBB = PN->getIncomingBlock(*U); + else + InsertionBB = UserInst->getParent(); + + if (isa(InsertionBB->getTerminator())) { + LLVM_DEBUG(dbgs() << "Bailing out: catchswitch means thre is no valid " + "insertion point.\n"); + return false; + } + + auto CacheKey = std::make_pair(Const, InsertionBB); + Value *LoadedVal = nullptr; + + // Re-use a load if it exists in the insertion block. + if (LocalLoads.count(CacheKey)) { + LoadedVal = LocalLoads.at(CacheKey); + } else { + // Otherwise, create a new GEP and Load at the correct insertion point. + // It is always safe to insert in the first insertion point in the BB, + // so do that and let other passes reorder. + IRBuilder<> Builder(InsertionBB, InsertionBB->getFirstInsertionPt()); + Value *ElementPtr = Builder.CreateConstInBoundsGEP2_64( + GlobalArray->getValueType(), GlobalArray, 0, Idx, "double.addr"); + LoadedVal = Builder.CreateLoad(DoubleTy, ElementPtr, "double.val"); + + // Cache the newly created load for this block. + LocalLoads[CacheKey] = LoadedVal; + } + + U->set(LoadedVal); + ++NumPromotedUses; + } + ++Idx; + } + + return true; +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index ae54ff1515121..16ef67da83128 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -139,6 +139,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVExpandAtomicPseudoPass(*PR); initializeRISCVRedundantCopyEliminationPass(*PR); initializeRISCVAsmPrinterPass(*PR); + initializeRISCVPromoteConstantPass(*PR); } static Reloc::Model getEffectiveRelocModel(std::optional RM) { @@ -462,6 +463,8 @@ void RISCVPassConfig::addIRPasses() { } bool RISCVPassConfig::addPreISel() { + if (TM->getOptLevel() != CodeGenOptLevel::None) + addPass(createRISCVPromoteConstantPass()); if (TM->getOptLevel() != CodeGenOptLevel::None) { // Add a barrier before instruction selection so that we will not get // deleted block address after enabling default outlining. See D99707 for diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 2588c878d8472..9e65399e75dc7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -167,8 +167,7 @@ static bool sinkScalarOperands(VPlan &Plan) { if (!isa(Candidate)) return; - if (Candidate->getParent() == SinkTo || Candidate->mayHaveSideEffects() || - Candidate->mayReadOrWriteMemory()) + if (Candidate->getParent() == SinkTo || cannotHoistOrSinkRecipe(*Candidate)) return; if (auto *RepR = dyn_cast(Candidate)) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll new file mode 100644 index 0000000000000..006713ccabf47 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll @@ -0,0 +1,303 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>) + +define void @lasx_cast_128_s(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_cast_128_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x float>, ptr %va + %b = call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> %a) + store <8 x float> %b, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>) + +define void @lasx_cast_128_d(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_cast_128_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x double>, ptr %va + %b = call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> %a) + store <4 x double> %b, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64>) + +define void @lasx_cast_128(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_cast_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x i64>, ptr %va + %b = call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> %a) + store <4 x i64> %b, ptr %vd + ret void +} + +declare <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>) + +define void @lasx_concat_128_s(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_concat_128_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x float>, ptr %va + %b = load <4 x float>, ptr %vb + %c = call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> %a, <4 x float> %b) + store <8 x float> %c, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>) + +define void @lasx_concat_128_d(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_concat_128_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x double>, ptr %va + %b = load <2 x double>, ptr %vb + %c = call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> %a, <2 x double> %b) + store <4 x double> %c, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64>, <2 x i64>) + +define void @lasx_concat_128(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_concat_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x i64>, ptr %va + %b = load <2 x i64>, ptr %vb + %c = call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> %a, <2 x i64> %b) + store <4 x i64> %c, ptr %vd + ret void +} + +declare <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>) + +define void @lasx_extract_128_lo_s(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_lo_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %c = call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> %a) + store <4 x float> %c, ptr %vd + ret void +} + +declare <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>) + +define void @lasx_extract_128_lo_d(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_lo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %c = call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> %a) + store <2 x double> %c, ptr %vd + ret void +} + +declare <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64>) + +define void @lasx_extract_128_lo(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_lo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> %a) + store <2 x i64> %c, ptr %vd + ret void +} + +declare <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>) + +define void @lasx_extract_128_hi_s(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_hi_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %c = call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> %a) + store <4 x float> %c, ptr %vd + ret void +} + +declare <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>) + +define void @lasx_extract_128_hi_d(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_hi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %c = call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> %a) + store <2 x double> %c, ptr %vd + ret void +} + +declare <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64>) + +define void @lasx_extract_128_hi(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> %a) + store <2 x i64> %c, ptr %vd + ret void +} + +declare <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>) + +define void @lasx_insert_128_lo_s(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_lo_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %b = load <4 x float>, ptr %vb + %c = call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> %a, <4 x float> %b) + store <8 x float> %c, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>) + +define void @lasx_insert_128_lo_d(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_lo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %b = load <2 x double>, ptr %vb + %c = call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> %a, <2 x double> %b) + store <4 x double> %c, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64>, <2 x i64>) + +define void @lasx_insert_128_lo(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_lo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %b = load <2 x i64>, ptr %vb + %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> %a, <2 x i64> %b) + store <4 x i64> %c, ptr %vd + ret void +} + +declare <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>) + +define void @lasx_insert_128_hi_s(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_hi_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %b = load <4 x float>, ptr %vb + %c = call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> %a, <4 x float> %b) + store <8 x float> %c, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>) + +define void @lasx_insert_128_hi_d(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_hi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %b = load <2 x double>, ptr %vb + %c = call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> %a, <2 x double> %b) + store <4 x double> %c, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64>, <2 x i64>) + +define void @lasx_insert_128_hi(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %b = load <2 x i64>, ptr %vb + %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> %a, <2 x i64> %b) + store <4 x i64> %c, ptr %vd + ret void +} diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index ea08061221fd4..769823d1c4216 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -75,6 +75,7 @@ ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: RISC-V Promote Constants ; CHECK-NEXT: A No-Op Barrier Pass ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Merge internal globals diff --git a/llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir b/llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir new file mode 100644 index 0000000000000..7844589e3f93c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir @@ -0,0 +1,35 @@ +# RUN: llc %s -mtriple=riscv64 \ +# RUN: -run-pass=cfi-instr-inserter \ +# RUN: -riscv-enable-cfi-instr-inserter=true +# XFAIL: * + +# Technically, it is possible that a callee-saved register is saved in multiple different locations. +# CFIInstrInserter should handle this, but currently it does not. +--- +name: multiple_locations +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x9, $x2 + BEQ $x10, $x0, %bb.3 + PseudoBR %bb.2 + + bb.1: + liveins: $x10, $x9, $x2 + $x5 = COPY $x9 + CFI_INSTRUCTION register $x9, $x5 + $x9 = COPY $x5 + CFI_INSTRUCTION register $x9, $x9 + PseudoBR %bb.3 + + bb.2: + liveins: $x10, $x9, $x2 + SD $x9, $x2, 0 :: (store (s64)) + CFI_INSTRUCTION offset $x9, 0 + $x9 = LD $x2, 0 :: (load (s64)) + CFI_INSTRUCTION register $x9, $x9 + PseudoBR %bb.3 + + bb.3: + PseudoRET +... diff --git a/llvm/test/CodeGen/RISCV/riscv-promote-constant.ll b/llvm/test/CodeGen/RISCV/riscv-promote-constant.ll new file mode 100644 index 0000000000000..2bde6013b3640 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/riscv-promote-constant.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt %s -S -riscv-promote-const -mtriple=riscv64 -mattr=+d | FileCheck %s + +; No promotion should take place, as the pass skips floats. +define float @multiple_floats(float %a, float %b) { +; CHECK-LABEL: define float @multiple_floats( +; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[A]], 1.000000e+00 +; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[B]], 2.000000e+00 +; CHECK-NEXT: [[SUM_F:%.*]] = fadd float [[ADD1]], [[ADD2]] +; CHECK-NEXT: ret float [[SUM_F]] +; +entry: + %add1 = fadd float %a, 1.0 + %add2 = fadd float %b, 2.0 + %sum_f = fadd float %add1, %add2 + ret float %sum_f +} + +; No promotion should take place as cases with a single constant are skipped. +define double @single_double(double %a) { +; CHECK-LABEL: define double @single_double( +; CHECK-SAME: double [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[A]], 4.210000e+01 +; CHECK-NEXT: ret double [[ADD]] +; +entry: + %add = fadd double %a, 42.1 + ret double %add +} + +; Promotion should happen as we have at least two unique constants that would +; otherwise go in the constant pool. +define double @multiple_doubles(double %a, double %b) { +; CHECK-LABEL: define double @multiple_doubles( +; CHECK-SAME: double [[A:%.*]], double [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[DOUBLE_VAL1:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles, i64 0, i64 1), align 8 +; CHECK-NEXT: [[ADD3:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles, align 8 +; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[A]], [[ADD3]] +; CHECK-NEXT: [[ADD4:%.*]] = fadd double [[B]], [[DOUBLE_VAL1]] +; CHECK-NEXT: [[SUM:%.*]] = fadd double [[ADD2]], [[ADD3]] +; CHECK-NEXT: [[SUM1:%.*]] = fadd double [[ADD4]], [[SUM]] +; CHECK-NEXT: ret double [[SUM1]] +; +entry: + %add1 = fadd double %a, 2.718 + %add2 = fadd double %b, 42.1 + %add3 = fadd double %add1, 2.718 + %sum = fadd double %add2, %add3 + ret double %sum +} + +; Promotion should not happen as the constants will be materialised rather +; than using the constant pool. +define double @multiple_doubles_no_promote(double %a, double %b) { +; CHECK-LABEL: define double @multiple_doubles_no_promote( +; CHECK-SAME: double [[A:%.*]], double [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[A]], 1.000000e+00 +; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[B]], 2.000000e+00 +; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[ADD1]], 1.000000e+00 +; CHECK-NEXT: [[SUM:%.*]] = fadd double [[ADD2]], [[ADD3]] +; CHECK-NEXT: ret double [[SUM]] +; +entry: + %add1 = fadd double %a, 1.0 + %add2 = fadd double %b, 2.0 + %add3 = fadd double %add1, 1.0 + %sum = fadd double %add2, %add3 + ret double %sum +} + +; The same constant shouldn't be loaded more than once per BB. +define double @multiple_doubles_multi_bb(double %a, i1 %cond) { +; CHECK-LABEL: define double @multiple_doubles_multi_bb( +; CHECK-SAME: double [[A:%.*]], i1 [[COND:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[COND]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]] +; CHECK: [[IF_TRUE]]: +; CHECK-NEXT: [[DOUBLE_VAL2:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles_multi_bb, i64 0, i64 1), align 8 +; CHECK-NEXT: [[DOUBLE_VAL:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles_multi_bb, align 8 +; CHECK-NEXT: [[ADD_T:%.*]] = fadd double [[A]], [[DOUBLE_VAL]] +; CHECK-NEXT: [[MUL_T:%.*]] = fmul double [[ADD_T]], [[DOUBLE_VAL2]] +; CHECK-NEXT: [[SUB_T:%.*]] = fsub double [[MUL_T]], [[DOUBLE_VAL]] +; CHECK-NEXT: br label %[[IF_END:.*]] +; CHECK: [[IF_FALSE]]: +; CHECK-NEXT: [[DOUBLE_VAL3:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles_multi_bb, i64 0, i64 1), align 8 +; CHECK-NEXT: [[DOUBLE_VAL1:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles_multi_bb, align 8 +; CHECK-NEXT: [[ADD_F:%.*]] = fadd double [[A]], [[DOUBLE_VAL1]] +; CHECK-NEXT: [[MUL_F:%.*]] = fmul double [[ADD_F]], [[DOUBLE_VAL3]] +; CHECK-NEXT: [[SUB_F:%.*]] = fsub double [[MUL_F]], [[DOUBLE_VAL1]] +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi double [ [[SUB_T]], %[[IF_TRUE]] ], [ [[SUB_F]], %[[IF_FALSE]] ] +; CHECK-NEXT: ret double [[PHI_RES]] +; +entry: + br i1 %cond, label %if.true, label %if.false + +if.true: + %add.t = fadd double %a, 1.23 + %mul.t = fmul double %add.t, 4.56 + %sub.t = fsub double %mul.t, 1.23 + br label %if.end + +if.false: + %add.f = fadd double %a, 1.23 + %mul.f = fmul double %add.f, 4.56 + %sub.f = fsub double %mul.f, 1.23 + br label %if.end + +if.end: + %phi.res = phi double [ %sub.t, %if.true ], [ %sub.f, %if.false ] + ret double %phi.res +} + +; Check the insertion point in the case we have a phi taking a constant C and +; the source block also uses that same constant. +define double @multiple_doubles_phi(double %a, i1 %cond) { +; CHECK-LABEL: define double @multiple_doubles_phi( +; CHECK-SAME: double [[A:%.*]], i1 [[COND:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 [[COND]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[DOUBLE_VAL:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles_phi, align 8 +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[A]], [[DOUBLE_VAL]] +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[PHI_VAL:%.*]] = phi double [ [[DOUBLE_VAL]], %[[IF_THEN]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[DOUBLE_VAL1:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles_phi, i64 0, i64 1), align 8 +; CHECK-NEXT: [[RES:%.*]] = fadd double [[PHI_VAL]], [[DOUBLE_VAL1]] +; CHECK-NEXT: ret double [[RES]] +; +entry: + br i1 %cond, label %if.then, label %if.end + +if.then: + %mul = fmul double %a, 1.23 + br label %if.end + +if.end: + %phi.val = phi double [ 1.23, %if.then ], [ %a, %entry ] + %res = fadd double %phi.val, 4.56 + ret double %res +} diff --git a/llvm/test/CodeGen/X86/pr166534.ll b/llvm/test/CodeGen/X86/pr166534.ll new file mode 100644 index 0000000000000..aef44cc3e40d0 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr166534.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE2 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE4 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512 + +define void @pr166534(ptr %pa, ptr %pb, ptr %pc, ptr %pd) { +; SSE2-LABEL: pr166534: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movq (%rdi), %rax +; SSE2-NEXT: movq 8(%rdi), %r8 +; SSE2-NEXT: movdqu (%rdi), %xmm0 +; SSE2-NEXT: movq (%rsi), %r9 +; SSE2-NEXT: movq 8(%rsi), %rdi +; SSE2-NEXT: movdqu (%rsi), %xmm1 +; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE2-NEXT: pmovmskb %xmm1, %esi +; SSE2-NEXT: xorl %r10d, %r10d +; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF +; SSE2-NEXT: sete %r10b +; SSE2-NEXT: orq %r10, (%rdx) +; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF +; SSE2-NEXT: jne .LBB0_2 +; SSE2-NEXT: # %bb.1: # %if.then +; SSE2-NEXT: xorq %r9, %rax +; SSE2-NEXT: xorq %rdi, %r8 +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: orq %rax, %r8 +; SSE2-NEXT: sete %dl +; SSE2-NEXT: orq %rdx, (%rcx) +; SSE2-NEXT: .LBB0_2: # %if.end +; SSE2-NEXT: retq +; +; SSE4-LABEL: pr166534: +; SSE4: # %bb.0: # %entry +; SSE4-NEXT: movq (%rdi), %rax +; SSE4-NEXT: movq 8(%rdi), %r8 +; SSE4-NEXT: movdqu (%rdi), %xmm0 +; SSE4-NEXT: movq (%rsi), %r9 +; SSE4-NEXT: movq 8(%rsi), %rdi +; SSE4-NEXT: movdqu (%rsi), %xmm1 +; SSE4-NEXT: pxor %xmm0, %xmm1 +; SSE4-NEXT: xorl %esi, %esi +; SSE4-NEXT: ptest %xmm1, %xmm1 +; SSE4-NEXT: sete %sil +; SSE4-NEXT: orq %rsi, (%rdx) +; SSE4-NEXT: ptest %xmm1, %xmm1 +; SSE4-NEXT: jne .LBB0_2 +; SSE4-NEXT: # %bb.1: # %if.then +; SSE4-NEXT: xorq %r9, %rax +; SSE4-NEXT: xorq %rdi, %r8 +; SSE4-NEXT: xorl %edx, %edx +; SSE4-NEXT: orq %rax, %r8 +; SSE4-NEXT: sete %dl +; SSE4-NEXT: orq %rdx, (%rcx) +; SSE4-NEXT: .LBB0_2: # %if.end +; SSE4-NEXT: retq +; +; AVX2-LABEL: pr166534: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: movq (%rdi), %rax +; AVX2-NEXT: movq 8(%rdi), %r8 +; AVX2-NEXT: vmovdqu (%rdi), %xmm0 +; AVX2-NEXT: movq (%rsi), %rdi +; AVX2-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; AVX2-NEXT: movq 8(%rsi), %rsi +; AVX2-NEXT: xorl %r9d, %r9d +; AVX2-NEXT: vptest %xmm0, %xmm0 +; AVX2-NEXT: sete %r9b +; AVX2-NEXT: orq %r9, (%rdx) +; AVX2-NEXT: vptest %xmm0, %xmm0 +; AVX2-NEXT: jne .LBB0_2 +; AVX2-NEXT: # %bb.1: # %if.then +; AVX2-NEXT: xorq %rdi, %rax +; AVX2-NEXT: xorq %rsi, %r8 +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: orq %rax, %r8 +; AVX2-NEXT: sete %dl +; AVX2-NEXT: orq %rdx, (%rcx) +; AVX2-NEXT: .LBB0_2: # %if.end +; AVX2-NEXT: retq +; +; AVX512-LABEL: pr166534: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: movq (%rdi), %rax +; AVX512-NEXT: movq 8(%rdi), %r8 +; AVX512-NEXT: vmovdqu (%rdi), %xmm0 +; AVX512-NEXT: movq (%rsi), %r9 +; AVX512-NEXT: movq 8(%rsi), %rdi +; AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: vptest %xmm0, %xmm0 +; AVX512-NEXT: sete %sil +; AVX512-NEXT: orq %rsi, (%rdx) +; AVX512-NEXT: vptest %xmm0, %xmm0 +; AVX512-NEXT: jne .LBB0_2 +; AVX512-NEXT: # %bb.1: # %if.then +; AVX512-NEXT: xorq %r9, %rax +; AVX512-NEXT: xorq %rdi, %r8 +; AVX512-NEXT: xorl %edx, %edx +; AVX512-NEXT: orq %rax, %r8 +; AVX512-NEXT: sete %dl +; AVX512-NEXT: orq %rdx, (%rcx) +; AVX512-NEXT: .LBB0_2: # %if.end +; AVX512-NEXT: retq +entry: + %a = load i128, ptr %pa, align 8 + %b = load i128, ptr %pb, align 8 + %cmp = icmp eq i128 %a, %b + %conv1 = zext i1 %cmp to i128 + %c = load i128, ptr %pc, align 8 + %or = or i128 %c, %conv1 + store i128 %or, ptr %pc, align 8 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %d = load i128, ptr %pd, align 8 + %or7 = or i128 %d, %conv1 + store i128 %or7, ptr %pd, align 8 + br label %if.end + +if.end: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll index a852b731ea13b..9e523be618b44 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll @@ -12,12 +12,15 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 -56) +; CHECK-NEXT: [[TMP18:%.*]] = alloca i8, i64 [[N]], align 16 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP18]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = alloca i8, i64 [[N]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0 ; CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: @@ -26,7 +29,7 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK: [[PRED_STORE_IF1]]: ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = alloca i8, i64 [[N]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0 ; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: @@ -35,7 +38,7 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK: [[PRED_STORE_IF3]]: ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = alloca i8, i64 [[N]], align 16 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0 ; CHECK-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]] ; CHECK: [[PRED_STORE_CONTINUE4]]: @@ -44,7 +47,7 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK: [[PRED_STORE_IF5]]: ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = alloca i8, i64 [[N]], align 16 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0 ; CHECK-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; CHECK: [[PRED_STORE_CONTINUE6]]: diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn index 2ece91331c5d8..11a57fcb008cd 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn @@ -12,10 +12,17 @@ tablegen("WebAssemblyGenFastISel") { td_file = "WebAssembly.td" } +tablegen("WebAssemblyGenSDNodeInfo") { + visibility = [ ":LLVMWebAssemblyCodeGen" ] + args = [ "-gen-sd-node-info" ] + td_file = "WebAssembly.td" +} + static_library("LLVMWebAssemblyCodeGen") { deps = [ ":WebAssemblyGenDAGISel", ":WebAssemblyGenFastISel", + ":WebAssemblyGenSDNodeInfo", "MCTargetDesc", "TargetInfo", "//llvm/include/llvm/Config:llvm-config", diff --git a/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h b/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h index 964281592cc65..cad6cec761ab8 100644 --- a/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h @@ -92,12 +92,43 @@ class VectorConvertToLLVMPattern : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; using Super = VectorConvertToLLVMPattern; + /// Return the given type if it's a floating point type. If the given type is + /// a vector type, return its element type if it's a floating point type. + static FloatType getFloatingPointType(Type type) { + if (auto floatType = dyn_cast(type)) + return floatType; + if (auto vecType = dyn_cast(type)) + return dyn_cast(vecType.getElementType()); + return nullptr; + } + LogicalResult matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { static_assert( std::is_base_of, SourceOp>::value, "expected single result op"); + + // The pattern should not apply if a floating-point operand is converted to + // a non-floating-point type. This indicates that the floating point type + // is not supported by the LLVM lowering. (Such types are converted to + // integers.) + auto checkType = [&](Value v) -> LogicalResult { + FloatType floatType = getFloatingPointType(v.getType()); + if (!floatType) + return success(); + Type convertedType = this->getTypeConverter()->convertType(floatType); + if (!isa_and_nonnull(convertedType)) + return rewriter.notifyMatchFailure(op, + "unsupported floating point type"); + return success(); + }; + for (Value operand : op->getOperands()) + if (failed(checkType(operand))) + return failure(); + if (failed(checkType(op->getResult(0)))) + return failure(); + // Determine attributes for the target op AttrConvert attrConvert(op); diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index ed7e2a08ebfd9..5ac9e26e8636d 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -981,6 +981,28 @@ class ConversionPatternRewriter final : public PatternRewriter { /// Return a reference to the internal implementation. detail::ConversionPatternRewriterImpl &getImpl(); + /// Attempt to legalize the given operation. This can be used within + /// conversion patterns to change the default pre-order legalization order. + /// Returns "success" if the operation was legalized, "failure" otherwise. + /// + /// Note: In a partial conversion, this function returns "success" even if + /// the operation could not be legalized, as long as it was not explicitly + /// marked as illegal in the conversion target. + LogicalResult legalize(Operation *op); + + /// Attempt to legalize the given region. This can be used within + /// conversion patterns to change the default pre-order legalization order. + /// Returns "success" if the region was legalized, "failure" otherwise. + /// + /// If the current pattern runs with a type converter, the entry block + /// signature will be converted before legalizing the operations in the + /// region. + /// + /// Note: In a partial conversion, this function returns "success" even if + /// an operation could not be legalized, as long as it was not explicitly + /// marked as illegal in the conversion target. + LogicalResult legalize(Region *r); + private: // Allow OperationConverter to construct new rewriters. friend struct OperationConverter; @@ -989,7 +1011,8 @@ class ConversionPatternRewriter final : public PatternRewriter { /// conversions. They apply some IR rewrites in a delayed fashion and could /// bring the IR into an inconsistent state when used standalone. explicit ConversionPatternRewriter(MLIRContext *ctx, - const ConversionConfig &config); + const ConversionConfig &config, + OperationConverter &converter); // Hide unsupported pattern rewriter API. using OpBuilder::setListener; diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 2fe06970eb568..f8c38fadbd229 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -92,6 +92,22 @@ static OpBuilder::InsertPoint computeInsertPoint(ArrayRef vals) { return pt; } +namespace { +enum OpConversionMode { + /// In this mode, the conversion will ignore failed conversions to allow + /// illegal operations to co-exist in the IR. + Partial, + + /// In this mode, all operations must be legal for the given target for the + /// conversion to succeed. + Full, + + /// In this mode, operations are analyzed for legality. No actual rewrites are + /// applied to the operations on success. + Analysis, +}; +} // namespace + //===----------------------------------------------------------------------===// // ConversionValueMapping //===----------------------------------------------------------------------===// @@ -866,8 +882,9 @@ namespace mlir { namespace detail { struct ConversionPatternRewriterImpl : public RewriterBase::Listener { explicit ConversionPatternRewriterImpl(ConversionPatternRewriter &rewriter, - const ConversionConfig &config) - : rewriter(rewriter), config(config), + const ConversionConfig &config, + OperationConverter &opConverter) + : rewriter(rewriter), config(config), opConverter(opConverter), notifyingRewriter(rewriter.getContext(), config.listener) {} //===--------------------------------------------------------------------===// @@ -1124,6 +1141,9 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { /// Dialect conversion configuration. const ConversionConfig &config; + /// The operation converter to use for recursive legalization. + OperationConverter &opConverter; + /// A set of erased operations. This set is utilized only if /// `allowPatternRollback` is set to "false". Conceptually, this set is /// similar to `replacedOps` (which is maintained when the flag is set to @@ -2084,9 +2104,10 @@ void ConversionPatternRewriterImpl::notifyMatchFailure( //===----------------------------------------------------------------------===// ConversionPatternRewriter::ConversionPatternRewriter( - MLIRContext *ctx, const ConversionConfig &config) - : PatternRewriter(ctx), - impl(new detail::ConversionPatternRewriterImpl(*this, config)) { + MLIRContext *ctx, const ConversionConfig &config, + OperationConverter &opConverter) + : PatternRewriter(ctx), impl(new detail::ConversionPatternRewriterImpl( + *this, config, opConverter)) { setListener(impl.get()); } @@ -2207,6 +2228,37 @@ ConversionPatternRewriter::getRemappedValues(ValueRange keys, return success(); } +LogicalResult ConversionPatternRewriter::legalize(Region *r) { + // Fast path: If the region is empty, there is nothing to legalize. + if (r->empty()) + return success(); + + // Gather a list of all operations to legalize. This is done before + // converting the entry block signature because unrealized_conversion_cast + // ops should not be included. + SmallVector ops; + for (Block &b : *r) + for (Operation &op : b) + ops.push_back(&op); + + // If the current pattern runs with a type converter, convert the entry block + // signature. + if (const TypeConverter *converter = impl->currentTypeConverter) { + std::optional conversion = + converter->convertBlockSignature(&r->front()); + if (!conversion) + return failure(); + applySignatureConversion(&r->front(), *conversion, converter); + } + + // Legalize all operations in the region. + for (Operation *op : ops) + if (failed(legalize(op))) + return failure(); + + return success(); +} + void ConversionPatternRewriter::inlineBlockBefore(Block *source, Block *dest, Block::iterator before, ValueRange argValues) { @@ -3192,22 +3244,6 @@ static void reconcileUnrealizedCasts( // OperationConverter //===----------------------------------------------------------------------===// -namespace { -enum OpConversionMode { - /// In this mode, the conversion will ignore failed conversions to allow - /// illegal operations to co-exist in the IR. - Partial, - - /// In this mode, all operations must be legal for the given target for the - /// conversion to succeed. - Full, - - /// In this mode, operations are analyzed for legality. No actual rewrites are - /// applied to the operations on success. - Analysis, -}; -} // namespace - namespace mlir { // This class converts operations to a given conversion target via a set of // rewrite patterns. The conversion behaves differently depending on the @@ -3217,16 +3253,20 @@ struct OperationConverter { const FrozenRewritePatternSet &patterns, const ConversionConfig &config, OpConversionMode mode) - : rewriter(ctx, config), opLegalizer(rewriter, target, patterns), + : rewriter(ctx, config, *this), opLegalizer(rewriter, target, patterns), mode(mode) {} /// Converts the given operations to the conversion target. LogicalResult convertOperations(ArrayRef ops); -private: - /// Converts an operation with the given rewriter. - LogicalResult convert(Operation *op); + /// Converts a single operation. If `isRecursiveLegalization` is "true", the + /// conversion is a recursive legalization request, triggered from within a + /// pattern. In that case, do not emit errors because there will be another + /// attempt at legalizing the operation later (via the regular pre-order + /// legalization mechanism). + LogicalResult convert(Operation *op, bool isRecursiveLegalization = false); +private: /// The rewriter to use when converting operations. ConversionPatternRewriter rewriter; @@ -3238,32 +3278,42 @@ struct OperationConverter { }; } // namespace mlir -LogicalResult OperationConverter::convert(Operation *op) { +LogicalResult ConversionPatternRewriter::legalize(Operation *op) { + return impl->opConverter.convert(op, /*isRecursiveLegalization=*/true); +} + +LogicalResult OperationConverter::convert(Operation *op, + bool isRecursiveLegalization) { const ConversionConfig &config = rewriter.getConfig(); // Legalize the given operation. if (failed(opLegalizer.legalize(op))) { // Handle the case of a failed conversion for each of the different modes. // Full conversions expect all operations to be converted. - if (mode == OpConversionMode::Full) - return op->emitError() - << "failed to legalize operation '" << op->getName() << "'"; + if (mode == OpConversionMode::Full) { + if (!isRecursiveLegalization) + op->emitError() << "failed to legalize operation '" << op->getName() + << "'"; + return failure(); + } // Partial conversions allow conversions to fail iff the operation was not // explicitly marked as illegal. If the user provided a `unlegalizedOps` // set, non-legalizable ops are added to that set. if (mode == OpConversionMode::Partial) { - if (opLegalizer.isIllegal(op)) - return op->emitError() - << "failed to legalize operation '" << op->getName() - << "' that was explicitly marked illegal"; - if (config.unlegalizedOps) + if (opLegalizer.isIllegal(op)) { + if (!isRecursiveLegalization) + op->emitError() << "failed to legalize operation '" << op->getName() + << "' that was explicitly marked illegal"; + return failure(); + } + if (config.unlegalizedOps && !isRecursiveLegalization) config.unlegalizedOps->insert(op); } } else if (mode == OpConversionMode::Analysis) { // Analysis conversions don't fail if any operations fail to legalize, // they are only interested in the operations that were successfully // legalized. - if (config.legalizableOps) + if (config.legalizableOps && !isRecursiveLegalization) config.legalizableOps->insert(op); } return success(); diff --git a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir index ba12ff29ebef9..b5dcb01d3dc6b 100644 --- a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir +++ b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir @@ -747,3 +747,29 @@ func.func @memref_bitcast(%1: memref) -> memref { %2 = arith.bitcast %1 : memref to memref func.return %2 : memref } + +// ----- + +// CHECK-LABEL: func @unsupported_fp_type +// CHECK: arith.addf {{.*}} : f4E2M1FN +// CHECK: arith.addf {{.*}} : vector<4xf4E2M1FN> +// CHECK: arith.addf {{.*}} : vector<8x4xf4E2M1FN> +func.func @unsupported_fp_type(%arg0: f4E2M1FN, %arg1: vector<4xf4E2M1FN>, %arg2: vector<8x4xf4E2M1FN>) -> (f4E2M1FN, vector<4xf4E2M1FN>, vector<8x4xf4E2M1FN>) { + %0 = arith.addf %arg0, %arg0 : f4E2M1FN + %1 = arith.addf %arg1, %arg1 : vector<4xf4E2M1FN> + %2 = arith.addf %arg2, %arg2 : vector<8x4xf4E2M1FN> + return %0, %1, %2 : f4E2M1FN, vector<4xf4E2M1FN>, vector<8x4xf4E2M1FN> +} + +// ----- + +// CHECK-LABEL: func @supported_fp_type +// CHECK: llvm.fadd {{.*}} : f32 +// CHECK: llvm.fadd {{.*}} : vector<4xf32> +// CHECK-COUNT-4: llvm.fadd {{.*}} : vector<8xf32> +func.func @supported_fp_type(%arg0: f32, %arg1: vector<4xf32>, %arg2: vector<4x8xf32>) -> (f32, vector<4xf32>, vector<4x8xf32>) { + %0 = arith.addf %arg0, %arg0 : f32 + %1 = arith.addf %arg1, %arg1 : vector<4xf32> + %2 = arith.addf %arg2, %arg2 : vector<4x8xf32> + return %0, %1, %2 : f32, vector<4xf32>, vector<4x8xf32> +} diff --git a/mlir/test/Transforms/test-legalizer-full.mlir b/mlir/test/Transforms/test-legalizer-full.mlir index 42cec68b9fbbb..8da9109a32762 100644 --- a/mlir/test/Transforms/test-legalizer-full.mlir +++ b/mlir/test/Transforms/test-legalizer-full.mlir @@ -72,3 +72,21 @@ builtin.module { } } + +// ----- + +// The region of "test.post_order_legalization" is converted before the op. + +// expected-remark@+1 {{applyFullConversion failed}} +builtin.module { +func.func @test_preorder_legalization() { + // expected-error@+1 {{failed to legalize operation 'test.post_order_legalization'}} + "test.post_order_legalization"() ({ + ^bb0(%arg0: i64): + // Not-explicitly-legal ops are not allowed to survive. + "test.remaining_consumer"(%arg0) : (i64) -> () + "test.invalid"(%arg0) : (i64) -> () + }) : () -> () + return +} +} diff --git a/mlir/test/Transforms/test-legalizer-rollback.mlir b/mlir/test/Transforms/test-legalizer-rollback.mlir index 71e11782e14b0..4bcca6b7e5228 100644 --- a/mlir/test/Transforms/test-legalizer-rollback.mlir +++ b/mlir/test/Transforms/test-legalizer-rollback.mlir @@ -163,3 +163,22 @@ func.func @create_unregistered_op_in_pattern() -> i32 { "test.return"(%0) : (i32) -> () } } + +// ----- + +// CHECK-LABEL: func @test_failed_preorder_legalization +// CHECK: "test.post_order_legalization"() ({ +// CHECK: %[[r:.*]] = "test.illegal_op_g"() : () -> i32 +// CHECK: "test.return"(%[[r]]) : (i32) -> () +// CHECK: }) : () -> () +// expected-remark @+1 {{applyPartialConversion failed}} +module { +func.func @test_failed_preorder_legalization() { + // expected-error @+1 {{failed to legalize operation 'test.post_order_legalization' that was explicitly marked illegal}} + "test.post_order_legalization"() ({ + %0 = "test.illegal_op_g"() : () -> (i32) + "test.return"(%0) : (i32) -> () + }) : () -> () + return +} +} diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir index 7c43bb7bface0..88a71cc26ab0c 100644 --- a/mlir/test/Transforms/test-legalizer.mlir +++ b/mlir/test/Transforms/test-legalizer.mlir @@ -448,3 +448,35 @@ func.func @test_working_1to1_pattern(%arg0: f16) { "test.type_consumer"(%arg0) : (f16) -> () "test.return"() : () -> () } + +// ----- + +// The region of "test.post_order_legalization" is converted before the op. + +// CHECK: notifyBlockInserted into test.post_order_legalization: was unlinked +// CHECK: notifyOperationInserted: test.invalid +// CHECK: notifyBlockErased +// CHECK: notifyOperationInserted: test.valid, was unlinked +// CHECK: notifyOperationReplaced: test.invalid +// CHECK: notifyOperationErased: test.invalid +// CHECK: notifyOperationModified: test.post_order_legalization + +// CHECK-LABEL: func @test_preorder_legalization +// CHECK: "test.post_order_legalization"() ({ +// CHECK: ^{{.*}}(%[[arg0:.*]]: f64): +// Note: The survival of a not-explicitly-invalid operation does *not* cause +// a conversion failure in when applying a partial conversion. +// CHECK: %[[cast:.*]] = "test.cast"(%[[arg0]]) : (f64) -> i64 +// CHECK: "test.remaining_consumer"(%[[cast]]) : (i64) -> () +// CHECK: "test.valid"(%[[arg0]]) : (f64) -> () +// CHECK: }) {is_legal} : () -> () +func.func @test_preorder_legalization() { + "test.post_order_legalization"() ({ + ^bb0(%arg0: i64): + // expected-remark @+1 {{'test.remaining_consumer' is not legalizable}} + "test.remaining_consumer"(%arg0) : (i64) -> () + "test.invalid"(%arg0) : (i64) -> () + }) : () -> () + // expected-remark @+1 {{'func.return' is not legalizable}} + return +} diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index bba397e2e58c0..a38cfa848de4b 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -1419,6 +1419,22 @@ class TestTypeConsumerOpPattern } }; +class TestPostOrderLegalization : public ConversionPattern { +public: + TestPostOrderLegalization(MLIRContext *ctx, const TypeConverter &converter) + : ConversionPattern(converter, "test.post_order_legalization", 1, ctx) {} + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + for (Region &r : op->getRegions()) + if (failed(rewriter.legalize(&r))) + return failure(); + rewriter.modifyOpInPlace( + op, [&]() { op->setAttr("is_legal", rewriter.getUnitAttr()); }); + return success(); + } +}; + /// Test unambiguous overload resolution of replaceOpWithMultiple. This /// function is just to trigger compiler errors. It is never executed. [[maybe_unused]] void testReplaceOpWithMultipleOverloads( @@ -1533,7 +1549,8 @@ struct TestLegalizePatternDriver patterns.add(&getContext(), converter); + TestTypeConsumerOpPattern, TestPostOrderLegalization>( + &getContext(), converter); patterns.add(converter, &getContext()); mlir::populateAnyFunctionOpInterfaceTypeConversionPattern(patterns, converter); @@ -1561,6 +1578,9 @@ struct TestLegalizePatternDriver target.addDynamicallyLegalOp( OperationName("test.value_replace", &getContext()), [](Operation *op) { return op->hasAttr("is_legal"); }); + target.addDynamicallyLegalOp( + OperationName("test.post_order_legalization", &getContext()), + [](Operation *op) { return op->hasAttr("is_legal"); }); // TestCreateUnregisteredOp creates `arith.constant` operation, // which was not added to target intentionally to test diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index 0888ebd7a9362..cb08397c201f2 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -927,9 +927,24 @@ static void runIslScheduleOptimizer( walkScheduleTreeForStatistics(Schedule, 2); } + // Check for why any computation could have failed if (MaxOpGuard.hasQuotaExceeded()) { POLLY_DEBUG(dbgs() << "Schedule optimizer calculation exceeds ISL quota\n"); return; + } else if (isl_ctx_last_error(Ctx) != isl_error_none) { + const char *File = isl_ctx_last_error_file(Ctx); + int Line = isl_ctx_last_error_line(Ctx); + const char *Msg = isl_ctx_last_error_msg(Ctx); + POLLY_DEBUG( + dbgs() + << "ISL reported an error during the computation of a new schedule at " + << File << ":" << Line << ": " << Msg); + isl_ctx_reset_error(Ctx); + return; + } else if (Schedule.is_null()) { + POLLY_DEBUG(dbgs() << "Schedule optimizer did not compute a new schedule " + "for unknown reasons\n"); + return; } // Skip profitability check if user transformation(s) have been applied. diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 5a1e0b53b021c..8d225d63cdf3e 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -2964,6 +2964,22 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_math_exp2m1f16", + hdrs = ["src/__support/math/exp2m1f16.h"], + deps = [ + ":__support_fputil_except_value_utils", + ":__support_fputil_fma", + ":__support_fputil_multiply_add", + ":__support_fputil_nearest_integer", + ":__support_fputil_polyeval", + ":__support_fputil_rounding_mode", + ":__support_macros_optimization", + ":__support_math_common_constants", + ":__support_math_expxf16_utils", + ], +) + libc_support_library( name = "__support_math_exp10", hdrs = ["src/__support/math/exp10.h"], @@ -3762,7 +3778,7 @@ libc_math_function( libc_math_function( name = "exp2m1f16", additional_deps = [ - ":__support_math_expxf16_utils", + ":__support_math_exp2m1f16", ], )