diff --git a/amd/comgr/src/comgr-compiler.cpp b/amd/comgr/src/comgr-compiler.cpp index 822e4b3ce512f..358e8af87f4c8 100644 --- a/amd/comgr/src/comgr-compiler.cpp +++ b/amd/comgr/src/comgr-compiler.cpp @@ -1129,7 +1129,7 @@ amd_comgr_status_t AMDGPUCompiler::addDeviceLibraries() { SmallString<256> ClangBinaryPath(env::getLLVMPath()); sys::path::append(ClangBinaryPath, "bin", "clang"); - std::string ClangResourceDir = Driver::GetResourcesPath(ClangBinaryPath); + std::string ClangResourceDir = GetResourcesPath(ClangBinaryPath); SmallString<256> DeviceLibPath(ClangResourceDir); sys::path::append(DeviceLibPath, "lib"); diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp b/bolt/unittests/Core/MCPlusBuilder.cpp index 439d72a343ce8..7f2a2100363e9 100644 --- a/bolt/unittests/Core/MCPlusBuilder.cpp +++ b/bolt/unittests/Core/MCPlusBuilder.cpp @@ -171,9 +171,11 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) { ASSERT_EQ(II->getOperand(0).getImm(), 34); ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false)); +#ifndef NDEBUG MCInst BTIinvalid; ASSERT_DEATH(BC->MIB->createBTI(BTIinvalid, false, false), "No target kinds!"); +#endif MCInst Paciasp = MCInstBuilder(AArch64::PACIASP); II = BB->addInstruction(Paciasp); diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index 7990f2719e9a0..4eda330716f21 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -132,8 +132,7 @@ std::optional detectSysroot() { std::string detectStandardResourceDir() { static int StaticForMainAddr; // Just an address in this process. - return CompilerInvocation::GetResourcesPath("clangd", - (void *)&StaticForMainAddr); + return GetResourcesPath("clangd", (void *)&StaticForMainAddr); } // The path passed to argv[0] is important: diff --git a/clang-tools-extra/clangd/Compiler.cpp b/clang-tools-extra/clangd/Compiler.cpp index 6ebc2eac25745..9ea7df139382a 100644 --- a/clang-tools-extra/clangd/Compiler.cpp +++ b/clang-tools-extra/clangd/Compiler.cpp @@ -9,6 +9,7 @@ #include "Compiler.h" #include "support/Logger.h" #include "clang/Basic/TargetInfo.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Lex/PreprocessorOptions.h" #include "clang/Serialization/PCHContainerOperations.h" diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index f7e6061044c6d..e7ca7b0bd0792 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -580,6 +580,8 @@ implementation. | need_device_addr modifier for adjust_args clause | :part:`partial` | :none:`unclaimed` | Parsing/Sema: https://github.com/llvm/llvm-project/pull/143442 | | | | | https://github.com/llvm/llvm-project/pull/149586 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| need_device_ptr modifier for adjust_args clause | :part:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | Prescriptive num_threads | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/160659 | | | | | https://github.com/llvm/llvm-project/pull/146403 | | | | | https://github.com/llvm/llvm-project/pull/146404 | @@ -631,7 +633,9 @@ implementation. | | | | RT: @abhinavgaba (https://github.com/llvm/llvm-project/pull/149036, | | | | | https://github.com/llvm/llvm-project/pull/158370) | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ - +| need_device_ptr modifier for adjust_args clause | :part:`partial` | :none:`unclaimed` | Clang Parsing/Sema: https://github.com/llvm/llvm-project/pull/168905 | +| | | | https://github.com/llvm/llvm-project/pull/169558 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ OpenMP Extensions ================= diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a03f9e1cf19ca..5d10db17423ef 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -84,6 +84,8 @@ Potentially Breaking Changes - Downstream projects that previously linked only against ``clangDriver`` may now (also) need to link against the new ``clangOptions`` library, since options-related code has been moved out of the Driver into a separate library. +- The ``clangFrontend`` library no longer depends on ``clangDriver``, which may + break downstream projects that relied on this transitive dependency. C/C++ Language Potentially Breaking Changes ------------------------------------------- @@ -1010,6 +1012,9 @@ OpenMP Support - Updated parsing and semantic analysis support for ``nowait`` clause to accept optional argument in OpenMP >= 60. - Added support for ``default`` clause on ``target`` directive. +- Added parsing and semantic analysis support for ``need_device_ptr`` modifier + to accept an optional fallback argument (``fb_nullify`` or ``fb_preserve``) + with OpenMP >= 61. Improvements ^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/DebugOptions.def b/clang/include/clang/Basic/DebugOptions.def index 136ac84e33c1c..fa4e4f1bb590e 100644 --- a/clang/include/clang/Basic/DebugOptions.def +++ b/clang/include/clang/Basic/DebugOptions.def @@ -69,6 +69,9 @@ DEBUGOPT(DebugKeyInstructions, 1, 0, Benign) DEBUGOPT(DebugColumnInfo, 1, 0, Compatible) ///< Whether or not to use column information ///< in debug info. +/// Whether or not to include call site information in debug info. +DEBUGOPT(DebugCallSiteInfo, 1, 1, Benign) + DEBUGOPT(DebugTypeExtRefs, 1, 0, Compatible) ///< Whether or not debug info should contain ///< external references to a PCH or module. diff --git a/clang/include/clang/Driver/CommonArgs.h b/clang/include/clang/Driver/CommonArgs.h index 56c2a65ce8c7b..464a4b335b620 100644 --- a/clang/include/clang/Driver/CommonArgs.h +++ b/clang/include/clang/Driver/CommonArgs.h @@ -312,16 +312,6 @@ void handleVectorizeLoopsArgs(const llvm::opt::ArgList &Args, void handleVectorizeSLPArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs); -// Parse -mprefer-vector-width=. Return the Value string if well-formed. -// Otherwise, return an empty string and issue a diagnosic message if needed. -StringRef parseMPreferVectorWidthOption(clang::DiagnosticsEngine &Diags, - const llvm::opt::ArgList &Args); - -// Parse -mrecip. Return the Value string if well-formed. -// Otherwise, return an empty string and issue a diagnosic message if needed. -StringRef parseMRecipOption(clang::DiagnosticsEngine &Diags, - const llvm::opt::ArgList &Args); - // Convert ComplexRangeKind to a string that can be passed as a frontend option. std::string complexRangeKindToStr(LangOptions::ComplexRangeKind Range); diff --git a/clang/include/clang/Driver/CreateASTUnitFromArgs.h b/clang/include/clang/Driver/CreateASTUnitFromArgs.h new file mode 100644 index 0000000000000..30575cc04ca7c --- /dev/null +++ b/clang/include/clang/Driver/CreateASTUnitFromArgs.h @@ -0,0 +1,80 @@ +//===-- CreateInvocationFromArgs.h - Create an ASTUnit from Args-*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility for creating an ASTUnit from a vector of command line arguments. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_DRIVER_CREATEASTUNITFROMARGS_H +#define LLVM_CLANG_DRIVER_CREATEASTUNITFROMARGS_H + +#include "clang/Frontend/ASTUnit.h" + +namespace clang { + +/// Create an ASTUnit from a vector of command line arguments, which must +/// specify exactly one source file. +/// +/// \param ArgBegin - The beginning of the argument vector. +/// +/// \param ArgEnd - The end of the argument vector. +/// +/// \param PCHContainerOps - The PCHContainerOperations to use for loading and +/// creating modules. +/// +/// \param Diags - The diagnostics engine to use for reporting errors; its +/// lifetime is expected to extend past that of the returned ASTUnit. +/// +/// \param ResourceFilesPath - The path to the compiler resource files. +/// +/// \param StorePreamblesInMemory - Whether to store PCH in memory. If false, +/// PCH are stored in temporary files. +/// +/// \param PreambleStoragePath - The path to a directory, in which to create +/// temporary PCH files. If empty, the default system temporary directory is +/// used. This parameter is ignored if \p StorePreamblesInMemory is true. +/// +/// \param ModuleFormat - If provided, uses the specific module format. +/// +/// \param ErrAST - If non-null and parsing failed without any AST to return +/// (e.g. because the PCH could not be loaded), this accepts the ASTUnit +/// mainly to allow the caller to see the diagnostics. +/// +/// \param VFS - A llvm::vfs::FileSystem to be used for all file accesses. +/// Note that preamble is saved to a temporary directory on a RealFileSystem, +/// so in order for it to be loaded correctly, VFS should have access to +/// it(i.e., be an overlay over RealFileSystem). RealFileSystem will be used +/// if \p VFS is nullptr. +/// +// FIXME: Move OnlyLocalDecls, UseBumpAllocator to setters on the ASTUnit, we +// shouldn't need to specify them at construction time. +std::unique_ptr CreateASTUnitFromCommandLine( + const char **ArgBegin, const char **ArgEnd, + std::shared_ptr PCHContainerOps, + std::shared_ptr DiagOpts, + IntrusiveRefCntPtr Diags, StringRef ResourceFilesPath, + bool StorePreamblesInMemory = false, + StringRef PreambleStoragePath = StringRef(), bool OnlyLocalDecls = false, + CaptureDiagsKind CaptureDiagnostics = CaptureDiagsKind::None, + ArrayRef RemappedFiles = {}, + bool RemappedFilesKeepOriginalName = true, + unsigned PrecompilePreambleAfterNParses = 0, + TranslationUnitKind TUKind = TU_Complete, + bool CacheCodeCompletionResults = false, + bool IncludeBriefCommentsInCodeCompletion = false, + bool AllowPCHWithCompilerErrors = false, + SkipFunctionBodiesScope SkipFunctionBodies = SkipFunctionBodiesScope::None, + bool SingleFileParse = false, bool UserFilesAreVolatile = false, + bool ForSerialization = false, bool RetainExcludedConditionalBlocks = false, + std::optional ModuleFormat = std::nullopt, + std::unique_ptr *ErrAST = nullptr, + IntrusiveRefCntPtr VFS = nullptr); + +} // namespace clang + +#endif // LLVM_CLANG_DRIVER_CREATEASTUNITFROMARGS_H diff --git a/clang/include/clang/Driver/CreateInvocationFromArgs.h b/clang/include/clang/Driver/CreateInvocationFromArgs.h new file mode 100644 index 0000000000000..0e0f67373ce87 --- /dev/null +++ b/clang/include/clang/Driver/CreateInvocationFromArgs.h @@ -0,0 +1,76 @@ +//===--- CreateInvocationFromArgs.h - CompilerInvocation from Args --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility for creating a CompilerInvocation from command-line arguments, for +// tools to use in preparation to parse a file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_DRIVER_CREATEINVOCATIONFROMARGS_H +#define LLVM_CLANG_DRIVER_CREATEINVOCATIONFROMARGS_H + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LLVM.h" +#include "llvm/Support/VirtualFileSystem.h" +#include +#include +#include + +namespace clang { + +class CompilerInvocation; +class DiagnosticsEngine; + +/// Optional inputs to createInvocation. +struct CreateInvocationOptions { + /// Receives diagnostics encountered while parsing command-line flags. + /// If not provided, these are printed to stderr. + IntrusiveRefCntPtr Diags = nullptr; + /// Used e.g. to probe for system headers locations. + /// If not provided, the real filesystem is used. + /// FIXME: the driver does perform some non-virtualized IO. + IntrusiveRefCntPtr VFS = nullptr; + /// Whether to attempt to produce a non-null (possibly incorrect) invocation + /// if any errors were encountered. + /// By default, always return null on errors. + bool RecoverOnError = false; + /// Allow the driver to probe the filesystem for PCH files. + /// This is used to replace -include with -include-pch in the cc1 args. + /// FIXME: ProbePrecompiled=true is a poor, historical default. + /// It misbehaves if the PCH file is from GCC, has the wrong version, etc. + bool ProbePrecompiled = false; + /// If set, the target is populated with the cc1 args produced by the driver. + /// This may be populated even if createInvocation returns nullptr. + std::vector *CC1Args = nullptr; +}; + +/// Interpret clang arguments in preparation to parse a file. +/// +/// This simulates a number of steps Clang takes when its driver is invoked: +/// - choosing actions (e.g compile + link) to run +/// - probing the system for settings like standard library locations +/// - spawning a cc1 subprocess to compile code, with more explicit arguments +/// - in the cc1 process, assembling those arguments into a CompilerInvocation +/// which is used to configure the parser +/// +/// This simulation is lossy, e.g. in some situations one driver run would +/// result in multiple parses. (Multi-arch, CUDA, ...). +/// This function tries to select a reasonable invocation that tools should use. +/// +/// Args[0] should be the driver name, such as "clang" or "/usr/bin/g++". +/// Absolute path is preferred - this affects searching for system headers. +/// +/// May return nullptr if an invocation could not be determined. +/// See CreateInvocationOptions::RecoverOnError to try harder! +std::unique_ptr +createInvocation(ArrayRef Args, + CreateInvocationOptions Opts = {}); + +} // namespace clang + +#endif // LLVM_CLANG_DRIVER_CREATEINVOCATIONFROMARGS_H diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index ed0048a507d71..f13a0dd439f22 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -412,10 +412,6 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns - /// the path to clang's resource directory. - static std::string GetResourcesPath(StringRef BinaryPath); - Driver(StringRef ClangExecutable, StringRef TargetTriple, DiagnosticsEngine &Diags, std::string Title = "clang LLVM compiler", IntrusiveRefCntPtr VFS = nullptr); diff --git a/clang/include/clang/Frontend/ASTUnit.h b/clang/include/clang/Frontend/ASTUnit.h index e585933a5c8be..341460e1962cb 100644 --- a/clang/include/clang/Frontend/ASTUnit.h +++ b/clang/include/clang/Frontend/ASTUnit.h @@ -23,11 +23,13 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetOptions.h" #include "clang/Frontend/PrecompiledPreamble.h" +#include "clang/Frontend/StandaloneDiagnostic.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/ModuleLoader.h" #include "clang/Lex/PreprocessingRecord.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "clang/Serialization/ASTBitCodes.h" +#include "clang/Serialization/ASTWriter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" @@ -36,6 +38,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Bitstream/BitstreamWriter.h" #include #include #include @@ -88,25 +91,6 @@ enum class CaptureDiagsKind { None, All, AllWithoutNonErrorsFromIncludes }; /// Utility class for loading a ASTContext from an AST file. class ASTUnit { -public: - struct StandaloneFixIt { - std::pair RemoveRange; - std::pair InsertFromRange; - std::string CodeToInsert; - bool BeforePreviousInsertions; - }; - - struct StandaloneDiagnostic { - unsigned ID; - DiagnosticsEngine::Level Level; - std::string Message; - std::string Filename; - unsigned LocOffset; - std::vector> Ranges; - std::vector FixIts; - }; - -private: std::unique_ptr LangOpts; std::unique_ptr CodeGenOpts; // FIXME: The documentation on \c LoadFrom* member functions states that the @@ -129,7 +113,15 @@ class ASTUnit { bool HadModuleLoaderFatalFailure = false; bool StorePreamblesInMemory = false; - struct ASTWriterData; + /// Utility struct for managing ASTWriter and its associated data streams. + struct ASTWriterData { + SmallString<128> Buffer; + llvm::BitstreamWriter Stream; + ASTWriter Writer; + + ASTWriterData(ModuleCache &ModCache, const CodeGenOptions &CGOpts) + : Stream(Buffer), Writer(Stream, Buffer, ModCache, CGOpts, {}) {} + }; std::unique_ptr WriterData; FileSystemOptions FileSystemOpts; @@ -271,11 +263,6 @@ class ASTUnit { static void ConfigureDiags(IntrusiveRefCntPtr Diags, ASTUnit &AST, CaptureDiagsKind CaptureDiagnostics); - void - TranslateStoredDiagnostics(FileManager &FileMgr, SourceManager &SrcMan, - const SmallVectorImpl &Diags, - SmallVectorImpl &Out); - void clearFileLevelDecls(); public: @@ -834,65 +821,24 @@ class ASTUnit { bool IncludeBriefCommentsInCodeCompletion = false, bool UserFilesAreVolatile = false); - /// LoadFromCommandLine - Create an ASTUnit from a vector of command line - /// arguments, which must specify exactly one source file. - /// - /// \param ArgBegin - The beginning of the argument vector. - /// - /// \param ArgEnd - The end of the argument vector. - /// - /// \param PCHContainerOps - The PCHContainerOperations to use for loading and - /// creating modules. - /// - /// \param Diags - The diagnostics engine to use for reporting errors; its - /// lifetime is expected to extend past that of the returned ASTUnit. - /// - /// \param ResourceFilesPath - The path to the compiler resource files. - /// - /// \param StorePreamblesInMemory - Whether to store PCH in memory. If false, - /// PCH are stored in temporary files. - /// - /// \param PreambleStoragePath - The path to a directory, in which to create - /// temporary PCH files. If empty, the default system temporary directory is - /// used. This parameter is ignored if \p StorePreamblesInMemory is true. - /// - /// \param ModuleFormat - If provided, uses the specific module format. - /// - /// \param ErrAST - If non-null and parsing failed without any AST to return - /// (e.g. because the PCH could not be loaded), this accepts the ASTUnit - /// mainly to allow the caller to see the diagnostics. - /// - /// \param VFS - A llvm::vfs::FileSystem to be used for all file accesses. - /// Note that preamble is saved to a temporary directory on a RealFileSystem, - /// so in order for it to be loaded correctly, VFS should have access to - /// it(i.e., be an overlay over RealFileSystem). RealFileSystem will be used - /// if \p VFS is nullptr. - /// - // FIXME: Move OnlyLocalDecls, UseBumpAllocator to setters on the ASTUnit, we - // shouldn't need to specify them at construction time. - static std::unique_ptr LoadFromCommandLine( + friend std::unique_ptr CreateASTUnitFromCommandLine( const char **ArgBegin, const char **ArgEnd, std::shared_ptr PCHContainerOps, std::shared_ptr DiagOpts, IntrusiveRefCntPtr Diags, StringRef ResourceFilesPath, - bool StorePreamblesInMemory = false, - StringRef PreambleStoragePath = StringRef(), bool OnlyLocalDecls = false, - CaptureDiagsKind CaptureDiagnostics = CaptureDiagsKind::None, - ArrayRef RemappedFiles = {}, - bool RemappedFilesKeepOriginalName = true, - unsigned PrecompilePreambleAfterNParses = 0, - TranslationUnitKind TUKind = TU_Complete, - bool CacheCodeCompletionResults = false, - bool IncludeBriefCommentsInCodeCompletion = false, - bool AllowPCHWithCompilerErrors = false, - SkipFunctionBodiesScope SkipFunctionBodies = - SkipFunctionBodiesScope::None, - bool SingleFileParse = false, bool UserFilesAreVolatile = false, - bool ForSerialization = false, - bool RetainExcludedConditionalBlocks = false, - std::optional ModuleFormat = std::nullopt, - std::unique_ptr *ErrAST = nullptr, - IntrusiveRefCntPtr VFS = nullptr); + bool StorePreamblesInMemory, StringRef PreambleStoragePath, + bool OnlyLocalDecls, CaptureDiagsKind CaptureDiagnostics, + ArrayRef RemappedFiles, + bool RemappedFilesKeepOriginalName, + unsigned PrecompilePreambleAfterNParses, TranslationUnitKind TUKind, + bool CacheCodeCompletionResults, + bool IncludeBriefCommentsInCodeCompletion, + bool AllowPCHWithCompilerErrors, + SkipFunctionBodiesScope SkipFunctionBodies, bool SingleFileParse, + bool UserFilesAreVolatile, bool ForSerialization, + bool RetainExcludedConditionalBlocks, + std::optional ModuleFormat, std::unique_ptr *ErrAST, + IntrusiveRefCntPtr VFS); /// Reparse the source files using the same command-line options that /// were originally used to produce this translation unit. @@ -963,6 +909,44 @@ class ASTUnit { bool serialize(raw_ostream &OS); }; +/// Diagnostic consumer that saves each diagnostic it is given. +class FilterAndStoreDiagnosticConsumer : public DiagnosticConsumer { + SmallVectorImpl *StoredDiags; + SmallVectorImpl *StandaloneDiags; + bool CaptureNonErrorsFromIncludes = true; + const LangOptions *LangOpts = nullptr; + SourceManager *SourceMgr = nullptr; + +public: + FilterAndStoreDiagnosticConsumer( + SmallVectorImpl *StoredDiags, + SmallVectorImpl *StandaloneDiags, + bool CaptureNonErrorsFromIncludes); + + void BeginSourceFile(const LangOptions &LangOpts, + const Preprocessor *PP = nullptr) override; + + void HandleDiagnostic(DiagnosticsEngine::Level Level, + const Diagnostic &Info) override; +}; + +/// RAII object that optionally captures and filters diagnostics, if +/// there is no diagnostic client to capture them already. +class CaptureDroppedDiagnostics { + DiagnosticsEngine &Diags; + FilterAndStoreDiagnosticConsumer Client; + DiagnosticConsumer *PreviousClient = nullptr; + std::unique_ptr OwningPreviousClient; + +public: + CaptureDroppedDiagnostics( + CaptureDiagsKind CaptureDiagnostics, DiagnosticsEngine &Diags, + SmallVectorImpl *StoredDiags, + SmallVectorImpl *StandaloneDiags); + + ~CaptureDroppedDiagnostics(); +}; + } // namespace clang #endif // LLVM_CLANG_FRONTEND_ASTUNIT_H diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h index b19a6e1a8acc3..4977ddb307d21 100644 --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -299,16 +299,6 @@ class CompilerInvocation : public CompilerInvocationBase { DiagnosticsEngine &Diags, const char *Argv0 = nullptr); - /// Get the directory where the compiler headers - /// reside, relative to the compiler binary (found by the passed in - /// arguments). - /// - /// \param Argv0 - The program path (from argv[0]), for finding the builtin - /// compiler path. - /// \param MainAddr - The address of main (or some other function in the main - /// executable), for finding the builtin compiler path. - static std::string GetResourcesPath(const char *Argv0, void *MainAddr); - /// Populate \p Opts with the default set of pointer authentication-related /// options given \p LangOpts and \p Triple. /// diff --git a/clang/include/clang/Frontend/StandaloneDiagnostic.h b/clang/include/clang/Frontend/StandaloneDiagnostic.h new file mode 100644 index 0000000000000..c23d5f95e0c2f --- /dev/null +++ b/clang/include/clang/Frontend/StandaloneDiagnostic.h @@ -0,0 +1,82 @@ +//===--- StandaloneDiagnostic.h - Serializable Diagnostic -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A serializable diagnostic representation to retain diagnostics after their +// SourceManager has been destroyed. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FRONTEND_STANDALONEDIAGNOSTICS_H +#define LLVM_CLANG_FRONTEND_STANDALONEDIAGNOSTICS_H + +#include "clang/Basic/DiagnosticIDs.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/Specifiers.h" +#include "llvm/ADT/StringExtras.h" +#include +#include +#include + +namespace clang { + +/// Represents a StoredDiagnostic in a form that can be retained until after its +/// SourceManager has been destroyed. +/// +/// Source locations are stored as a combination of filename and offsets into +/// that file. +/// To report the diagnostic, it must first be translated back into a +/// StoredDiagnostic with a new associated SourceManager. +struct StandaloneDiagnostic { + /// Represents a CharSourceRange within a StandaloneDiagnostic. + struct SourceOffsetRange { + SourceOffsetRange(CharSourceRange Range, const SourceManager &SrcMgr, + const LangOptions &LangOpts); + + unsigned Begin = 0; + unsigned End = 0; + }; + + /// Represents a FixItHint within a StandaloneDiagnostic. + struct StandaloneFixIt { + StandaloneFixIt(const SourceManager &SrcMgr, const LangOptions &LangOpts, + const FixItHint &FixIt); + + SourceOffsetRange RemoveRange; + SourceOffsetRange InsertFromRange; + std::string CodeToInsert; + bool BeforePreviousInsertions; + }; + + StandaloneDiagnostic(const LangOptions &LangOpts, + const StoredDiagnostic &InDiag); + + DiagnosticsEngine::Level Level; + SrcMgr::CharacteristicKind FileKind; + unsigned ID = 0; + unsigned FileOffset = 0; + std::string Message; + std::string Filename; + std::vector Ranges; + std::vector FixIts; +}; + +/// Translates \c StandaloneDiag into a StoredDiagnostic, associating it with +/// the provided FileManager and SourceManager. +/// +/// This allows the diagnostic to be emitted using the diagnostics engine, since +/// StandaloneDiagnostics themselfs cannot be emitted directly. +StoredDiagnostic +translateStandaloneDiag(FileManager &FileMgr, SourceManager &SrcMgr, + const StandaloneDiagnostic &StandaloneDiag, + llvm::StringMap &SrcLocCache); + +} // namespace clang + +#endif // STANDALONEDIAGNOSTICS diff --git a/clang/include/clang/Frontend/Utils.h b/clang/include/clang/Frontend/Utils.h index ed2703c76f18d..1c561b47b5c47 100644 --- a/clang/include/clang/Frontend/Utils.h +++ b/clang/include/clang/Frontend/Utils.h @@ -192,51 +192,6 @@ IntrusiveRefCntPtr createChainedIncludesSource(CompilerInstance &CI, IntrusiveRefCntPtr &OutReader); -/// Optional inputs to createInvocation. -struct CreateInvocationOptions { - /// Receives diagnostics encountered while parsing command-line flags. - /// If not provided, these are printed to stderr. - IntrusiveRefCntPtr Diags = nullptr; - /// Used e.g. to probe for system headers locations. - /// If not provided, the real filesystem is used. - /// FIXME: the driver does perform some non-virtualized IO. - IntrusiveRefCntPtr VFS = nullptr; - /// Whether to attempt to produce a non-null (possibly incorrect) invocation - /// if any errors were encountered. - /// By default, always return null on errors. - bool RecoverOnError = false; - /// Allow the driver to probe the filesystem for PCH files. - /// This is used to replace -include with -include-pch in the cc1 args. - /// FIXME: ProbePrecompiled=true is a poor, historical default. - /// It misbehaves if the PCH file is from GCC, has the wrong version, etc. - bool ProbePrecompiled = false; - /// If set, the target is populated with the cc1 args produced by the driver. - /// This may be populated even if createInvocation returns nullptr. - std::vector *CC1Args = nullptr; -}; - -/// Interpret clang arguments in preparation to parse a file. -/// -/// This simulates a number of steps Clang takes when its driver is invoked: -/// - choosing actions (e.g compile + link) to run -/// - probing the system for settings like standard library locations -/// - spawning a cc1 subprocess to compile code, with more explicit arguments -/// - in the cc1 process, assembling those arguments into a CompilerInvocation -/// which is used to configure the parser -/// -/// This simulation is lossy, e.g. in some situations one driver run would -/// result in multiple parses. (Multi-arch, CUDA, ...). -/// This function tries to select a reasonable invocation that tools should use. -/// -/// Args[0] should be the driver name, such as "clang" or "/usr/bin/g++". -/// Absolute path is preferred - this affects searching for system headers. -/// -/// May return nullptr if an invocation could not be determined. -/// See CreateInvocationOptions::ShouldRecoverOnErrors to try harder! -std::unique_ptr -createInvocation(ArrayRef Args, - CreateInvocationOptions Opts = {}); - } // namespace clang #endif // LLVM_CLANG_FRONTEND_UTILS_H diff --git a/clang/include/clang/Options/OptionUtils.h b/clang/include/clang/Options/OptionUtils.h index 83c48bd7d6843..02c9c27554db1 100644 --- a/clang/include/clang/Options/OptionUtils.h +++ b/clang/include/clang/Options/OptionUtils.h @@ -28,6 +28,7 @@ class ArgList; } // namespace llvm namespace clang { + /// Return the value of the last argument as an integer, or a default. If Diags /// is non-null, emits an error if the argument is given, but non-integral. int getLastArgIntValue(const llvm::opt::ArgList &Args, @@ -53,6 +54,29 @@ inline uint64_t getLastArgUInt64Value(const llvm::opt::ArgList &Args, return getLastArgUInt64Value(Args, Id, Default, &Diags, Base); } +// Parse -mprefer-vector-width=. Return the Value string if well-formed. +// Otherwise, return an empty string and issue a diagnosic message if needed. +StringRef parseMPreferVectorWidthOption(clang::DiagnosticsEngine &Diags, + const llvm::opt::ArgList &Args); + +// Parse -mrecip. Return the Value string if well-formed. +// Otherwise, return an empty string and issue a diagnosic message if needed. +StringRef parseMRecipOption(clang::DiagnosticsEngine &Diags, + const llvm::opt::ArgList &Args); + +/// Get the directory where the compiler headers reside, relative to the +/// compiler binary path \p BinaryPath. +std::string GetResourcesPath(StringRef BinaryPath); + +/// Get the directory where the compiler headers reside, relative to the +/// compiler binary path (found by the passed in arguments). +/// +/// \param Argv0 The program path (from argv[0]), for finding the builtin +/// compiler path. +/// \param MainAddr The address of main (or some other function in the main +/// executable), for finding the builtin compiler path. +std::string GetResourcesPath(const char *Argv0, void *MainAddr); + } // namespace clang #endif // LLVM_CLANG_OPTIONS_OPTIONUTILS_H diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 67d17674ac794..28c609bb8524d 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -1429,6 +1429,16 @@ def fhip_emit_relocatable : Flag<["-"], "fhip-emit-relocatable">, HelpText<"Compile HIP source to relocatable">; def fno_hip_emit_relocatable : Flag<["-"], "fno-hip-emit-relocatable">, HelpText<"Do not override toolchain to compile HIP source to relocatable">; +def use_spirv_backend + : Flag<["-"], "use-spirv-backend">, + Group, + Flags<[HelpHidden]>, + HelpText<"Use the SPIRV backend for compilation ">; +def no_use_spirv_backend + : Flag<["-"], "no-use-spirv-backend">, + Group, + Flags<[HelpHidden]>, + HelpText<"Do not use the SPIRV backend for compilation ">; } // Clang specific/exclusive options for OpenACC. @@ -4954,6 +4964,14 @@ defm column_info : BoolOption<"g", "column-info", NegFlag, PosFlag, BothFlags<[], [ClangOption, CLOption, DXCOption]>>, Group; +defm call_site_info : BoolOption<"g", "call-site-info", + CodeGenOpts<"DebugCallSiteInfo">, + DefaultTrue, + PosFlag, + NegFlag, + BothFlags<[], [ClangOption, CC1Option], " call site debug info">>, + Group, + DocBrief<[{Call site debug info enables various debugger features including detecting tail calls for display in backtraces and displaying some source variable values that reference the call entry value.}]>; def gsplit_dwarf : Flag<["-"], "gsplit-dwarf">, Group, Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>; def gsplit_dwarf_EQ : Joined<["-"], "gsplit-dwarf=">, Group, diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp index 48c082d89de18..4c94db5ddd457 100644 --- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp @@ -644,6 +644,9 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest, case AtomicExpr::AO__scoped_atomic_nand_fetch: case AtomicExpr::AO__scoped_atomic_fetch_nand: + + case AtomicExpr::AO__scoped_atomic_uinc_wrap: + case AtomicExpr::AO__scoped_atomic_udec_wrap: cgf.cgm.errorNYI(expr->getSourceRange(), "emitAtomicOp: expr op NYI"); return; } diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 0a6051fe709f8..5d7ab98e0f87b 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -7094,7 +7094,8 @@ llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const { // when there's a possibility of debugging backtraces. if (CGM.getCodeGenOpts().OptimizationLevel == 0 || DebugKind == llvm::codegenoptions::NoDebugInfo || - DebugKind == llvm::codegenoptions::LocTrackingOnly) + DebugKind == llvm::codegenoptions::LocTrackingOnly || + !CGM.getCodeGenOpts().DebugCallSiteInfo) return llvm::DINode::FlagZero; // Call site-related attributes are available in DWARF v5. Some debuggers, diff --git a/clang/lib/CrossTU/CMakeLists.txt b/clang/lib/CrossTU/CMakeLists.txt index 3349fc283925d..eef7a892701fb 100644 --- a/clang/lib/CrossTU/CMakeLists.txt +++ b/clang/lib/CrossTU/CMakeLists.txt @@ -9,6 +9,7 @@ add_clang_library(clangCrossTU LINK_LIBS clangAST clangBasic + clangDriver clangFrontend clangIndex ) diff --git a/clang/lib/CrossTU/CrossTranslationUnit.cpp b/clang/lib/CrossTU/CrossTranslationUnit.cpp index 0287845a741ed..a3fc2cf6bfb3c 100644 --- a/clang/lib/CrossTU/CrossTranslationUnit.cpp +++ b/clang/lib/CrossTU/CrossTranslationUnit.cpp @@ -16,6 +16,7 @@ #include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/TargetInfo.h" #include "clang/CrossTU/CrossTUDiagnostic.h" +#include "clang/Driver/CreateASTUnitFromArgs.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/TextDiagnosticPrinter.h" @@ -619,7 +620,7 @@ CrossTranslationUnitContext::ASTLoader::loadFromSource( auto Diags = llvm::makeIntrusiveRefCnt(DiagID, *DiagOpts, DiagClient); - return ASTUnit::LoadFromCommandLine( + return CreateASTUnitFromCommandLine( CommandLineArgs.begin(), (CommandLineArgs.end()), CI.getPCHContainerOperations(), DiagOpts, Diags, CI.getHeaderSearchOpts().ResourceDir); diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index b68e26f4d3847..7a74b444eb8df 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -18,6 +18,8 @@ endif() add_clang_library(clangDriver Action.cpp Compilation.cpp + CreateASTUnitFromArgs.cpp + CreateInvocationFromArgs.cpp Distro.cpp Driver.cpp Job.cpp @@ -97,6 +99,8 @@ add_clang_library(clangDriver LINK_LIBS clangBasic + clangFrontend + clangSerialization clangLex clangOptions ${system_libs} diff --git a/clang/lib/Driver/CreateASTUnitFromArgs.cpp b/clang/lib/Driver/CreateASTUnitFromArgs.cpp new file mode 100644 index 0000000000000..ea31a8ed07c5f --- /dev/null +++ b/clang/lib/Driver/CreateASTUnitFromArgs.cpp @@ -0,0 +1,166 @@ +//===--- CreateASTUnitFromArgs.h - Create an ASTUnit from Args ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility for creating an ASTUnit from a vector of command line arguments. +// +//===----------------------------------------------------------------------===// + +#include "clang/Driver/CreateASTUnitFromArgs.h" +#include "clang/Driver/CreateInvocationFromArgs.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "clang/Serialization/ModuleCache.h" +#include "llvm/Support/CrashRecoveryContext.h" + +using namespace clang; + +/// Create an ASTUnit from a vector of command line arguments, which must +/// specify exactly one source file. +/// +/// \param ArgBegin - The beginning of the argument vector. +/// +/// \param ArgEnd - The end of the argument vector. +/// +/// \param PCHContainerOps - The PCHContainerOperations to use for loading and +/// creating modules. +/// +/// \param Diags - The diagnostics engine to use for reporting errors; its +/// lifetime is expected to extend past that of the returned ASTUnit. +/// +/// \param ResourceFilesPath - The path to the compiler resource files. +/// +/// \param StorePreamblesInMemory - Whether to store PCH in memory. If false, +/// PCH are stored in temporary files. +/// +/// \param PreambleStoragePath - The path to a directory, in which to create +/// temporary PCH files. If empty, the default system temporary directory is +/// used. This parameter is ignored if \p StorePreamblesInMemory is true. +/// +/// \param ModuleFormat - If provided, uses the specific module format. +/// +/// \param ErrAST - If non-null and parsing failed without any AST to return +/// (e.g. because the PCH could not be loaded), this accepts the ASTUnit +/// mainly to allow the caller to see the diagnostics. +/// +/// \param VFS - A llvm::vfs::FileSystem to be used for all file accesses. +/// Note that preamble is saved to a temporary directory on a RealFileSystem, +/// so in order for it to be loaded correctly, VFS should have access to +/// it(i.e., be an overlay over RealFileSystem). RealFileSystem will be used +/// if \p VFS is nullptr. +/// +// FIXME: Move OnlyLocalDecls, UseBumpAllocator to setters on the ASTUnit, we +// shouldn't need to specify them at construction time. +std::unique_ptr clang::CreateASTUnitFromCommandLine( + const char **ArgBegin, const char **ArgEnd, + std::shared_ptr PCHContainerOps, + std::shared_ptr DiagOpts, + IntrusiveRefCntPtr Diags, StringRef ResourceFilesPath, + bool StorePreamblesInMemory, StringRef PreambleStoragePath, + bool OnlyLocalDecls, CaptureDiagsKind CaptureDiagnostics, + ArrayRef RemappedFiles, + bool RemappedFilesKeepOriginalName, unsigned PrecompilePreambleAfterNParses, + TranslationUnitKind TUKind, bool CacheCodeCompletionResults, + bool IncludeBriefCommentsInCodeCompletion, bool AllowPCHWithCompilerErrors, + SkipFunctionBodiesScope SkipFunctionBodies, bool SingleFileParse, + bool UserFilesAreVolatile, bool ForSerialization, + bool RetainExcludedConditionalBlocks, std::optional ModuleFormat, + std::unique_ptr *ErrAST, + IntrusiveRefCntPtr VFS) { + assert(Diags.get() && "no DiagnosticsEngine was provided"); + + // If no VFS was provided, create one that tracks the physical file system. + // If '-working-directory' was passed as an argument, 'createInvocation' will + // set this as the current working directory of the VFS. + if (!VFS) + VFS = llvm::vfs::createPhysicalFileSystem(); + + SmallVector StoredDiagnostics; + + std::shared_ptr CI; + + { + CaptureDroppedDiagnostics Capture(CaptureDiagnostics, *Diags, + &StoredDiagnostics, nullptr); + + CreateInvocationOptions CIOpts; + CIOpts.VFS = VFS; + CIOpts.Diags = Diags; + CIOpts.ProbePrecompiled = true; // FIXME: historical default. Needed? + CI = createInvocation(llvm::ArrayRef(ArgBegin, ArgEnd), std::move(CIOpts)); + if (!CI) + return nullptr; + } + + // Override any files that need remapping + for (const auto &RemappedFile : RemappedFiles) { + CI->getPreprocessorOpts().addRemappedFile(RemappedFile.first, + RemappedFile.second); + } + PreprocessorOptions &PPOpts = CI->getPreprocessorOpts(); + PPOpts.RemappedFilesKeepOriginalName = RemappedFilesKeepOriginalName; + PPOpts.AllowPCHWithCompilerErrors = AllowPCHWithCompilerErrors; + PPOpts.SingleFileParseMode = SingleFileParse; + PPOpts.RetainExcludedConditionalBlocks = RetainExcludedConditionalBlocks; + + // Override the resources path. + CI->getHeaderSearchOpts().ResourceDir = std::string(ResourceFilesPath); + + CI->getFrontendOpts().SkipFunctionBodies = + SkipFunctionBodies == SkipFunctionBodiesScope::PreambleAndMainFile; + + if (ModuleFormat) + CI->getHeaderSearchOpts().ModuleFormat = std::string(*ModuleFormat); + + // Create the AST unit. + std::unique_ptr AST; + AST.reset(new ASTUnit(false)); + AST->NumStoredDiagnosticsFromDriver = StoredDiagnostics.size(); + AST->StoredDiagnostics.swap(StoredDiagnostics); + ASTUnit::ConfigureDiags(Diags, *AST, CaptureDiagnostics); + AST->DiagOpts = DiagOpts; + AST->Diagnostics = Diags; + AST->FileSystemOpts = CI->getFileSystemOpts(); + AST->CodeGenOpts = std::make_unique(CI->getCodeGenOpts()); + VFS = createVFSFromCompilerInvocation(*CI, *Diags, VFS); + AST->FileMgr = + llvm::makeIntrusiveRefCnt(AST->FileSystemOpts, VFS); + AST->StorePreamblesInMemory = StorePreamblesInMemory; + AST->PreambleStoragePath = PreambleStoragePath; + AST->ModCache = createCrossProcessModuleCache(); + AST->OnlyLocalDecls = OnlyLocalDecls; + AST->CaptureDiagnostics = CaptureDiagnostics; + AST->TUKind = TUKind; + AST->ShouldCacheCodeCompletionResults = CacheCodeCompletionResults; + AST->IncludeBriefCommentsInCodeCompletion = + IncludeBriefCommentsInCodeCompletion; + AST->UserFilesAreVolatile = UserFilesAreVolatile; + AST->Invocation = CI; + AST->SkipFunctionBodies = SkipFunctionBodies; + if (ForSerialization) + AST->WriterData.reset( + new ASTUnit::ASTWriterData(*AST->ModCache, *AST->CodeGenOpts)); + // Zero out now to ease cleanup during crash recovery. + CI = nullptr; + Diags = nullptr; + + // Recover resources if we crash before exiting this method. + llvm::CrashRecoveryContextCleanupRegistrar ASTUnitCleanup(AST.get()); + + if (AST->LoadFromCompilerInvocation(std::move(PCHContainerOps), + PrecompilePreambleAfterNParses, VFS)) { + // Some error occurred, if caller wants to examine diagnostics, pass it the + // ASTUnit. + if (ErrAST) { + AST->StoredDiagnostics.swap(AST->FailedParseDiagnostics); + ErrAST->swap(AST); + } + return nullptr; + } + + return AST; +} diff --git a/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp b/clang/lib/Driver/CreateInvocationFromArgs.cpp similarity index 93% rename from clang/lib/Frontend/CreateInvocationFromCommandLine.cpp rename to clang/lib/Driver/CreateInvocationFromArgs.cpp index e54e83151ad1e..516d61f1a1159 100644 --- a/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp +++ b/clang/lib/Driver/CreateInvocationFromArgs.cpp @@ -1,4 +1,4 @@ -//===--- CreateInvocationFromCommandLine.cpp - CompilerInvocation from Args ==// +//===--- CreateInvocationFromArgs.h - CompilerInvocation from Args --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,9 +10,9 @@ // //===----------------------------------------------------------------------===// +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/Action.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/Tool.h" @@ -24,12 +24,13 @@ #include "llvm/Option/ArgList.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/TargetParser/Host.h" -using namespace clang; + using namespace llvm::opt; +namespace clang { + std::unique_ptr -clang::createInvocation(ArrayRef ArgList, - CreateInvocationOptions Opts) { +createInvocation(ArrayRef ArgList, CreateInvocationOptions Opts) { assert(!ArgList.empty()); std::optional LocalDiagOpts; IntrusiveRefCntPtr Diags; @@ -114,3 +115,5 @@ clang::createInvocation(ArrayRef ArgList, return nullptr; return CI; } + +} // namespace clang diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 2a6648b13e4e5..b2989061474f0 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -67,6 +67,7 @@ #include "clang/Driver/Types.h" #include "clang/Driver/Util.h" #include "clang/Lex/DependencyDirectivesScanner.h" +#include "clang/Options/OptionUtils.h" #include "clang/Options/Options.h" #include "clang/Options/OptionUtils.h" #include "llvm/ADT/ArrayRef.h" @@ -127,40 +128,6 @@ template static bool usesInput(const ArgList &Args, F &&Fn) { }); } -// static -std::string Driver::GetResourcesPath(StringRef BinaryPath) { - // Since the resource directory is embedded in the module hash, it's important - // that all places that need it call this function, so that they get the - // exact same string ("a/../b/" and "b/" get different hashes, for example). - - // Dir is bin/ or lib/, depending on where BinaryPath is. - StringRef Dir = llvm::sys::path::parent_path(BinaryPath); - SmallString<128> P(Dir); - - StringRef ConfiguredResourceDir(CLANG_RESOURCE_DIR); - if (!ConfiguredResourceDir.empty()) { - // FIXME: We should fix the behavior of llvm::sys::path::append so we don't - // need to check for absolute paths here. - if (llvm::sys::path::is_absolute(ConfiguredResourceDir)) - P = ConfiguredResourceDir; - else - llvm::sys::path::append(P, ConfiguredResourceDir); - } else { - // On Windows, libclang.dll is in bin/. - // On non-Windows, libclang.so/.dylib is in lib/. - // With a static-library build of libclang, LibClangPath will contain the - // path of the embedding binary, which for LLVM binaries will be in bin/. - // ../lib gets us to lib/ in both cases. - P = llvm::sys::path::parent_path(Dir); - // This search path is also created in the COFF driver of lld, so any - // changes here also needs to happen in lld/COFF/Driver.cpp - llvm::sys::path::append(P, CLANG_INSTALL_LIBDIR_BASENAME, "clang", - CLANG_VERSION_MAJOR_STRING); - } - - return std::string(P); -} - CUIDOptions::CUIDOptions(llvm::opt::DerivedArgList &Args, const Driver &D) : UseCUID(Kind::Hash) { if (Arg *A = Args.getLastArg(options::OPT_fuse_cuid_EQ)) { @@ -5043,15 +5010,24 @@ Action *Driver::BuildOffloadingActions(Compilation &C, // Compiling HIP in device-only non-RDC mode requires linking each action // individually. for (Action *&A : DeviceActions) { - // Special handling for the HIP SPIR-V toolchain because it doesn't use - // the SPIR-V backend yet doesn't report the output as an object. bool IsAMDGCNSPIRV = A->getOffloadingToolChain() && A->getOffloadingToolChain()->getTriple().getOS() == llvm::Triple::OSType::AMDHSA && A->getOffloadingToolChain()->getTriple().isSPIRV(); + bool UseSPIRVBackend = Args.hasFlag(options::OPT_use_spirv_backend, + options::OPT_no_use_spirv_backend, + /*Default=*/false); + + // Special handling for the HIP SPIR-V toolchain in device-only. + // The translator path has a linking step, whereas the SPIR-V backend path + // does not to avoid any external dependency such as spirv-link. The + // linking step is skipped for the SPIR-V backend path. + bool IsAMDGCNSPIRVWithBackend = IsAMDGCNSPIRV && UseSPIRVBackend; + if ((A->getType() != types::TY_Object && !IsAMDGCNSPIRV && A->getType() != types::TY_LTO_BC) || - HIPRelocatableObj || !HIPNoRDC || !offloadDeviceOnly()) + HIPRelocatableObj || !HIPNoRDC || !offloadDeviceOnly() || + (IsAMDGCNSPIRVWithBackend && offloadDeviceOnly())) continue; ActionList LinkerInput = {A}; A = C.MakeAction(LinkerInput, types::TY_Image); @@ -5279,12 +5255,28 @@ Action *Driver::ConstructPhaseAction( Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; return C.MakeAction(Input, Output); } + bool UseSPIRVBackend = Args.hasFlag(options::OPT_use_spirv_backend, + options::OPT_no_use_spirv_backend, + /*Default=*/false); + + auto OffloadingToolChain = Input->getOffloadingToolChain(); + // For AMD SPIRV, if offloadDeviceOnly(), we call the SPIRV backend unless + // LLVM bitcode was requested explicitly or RDC is set. If + // !offloadDeviceOnly, we emit LLVM bitcode, and clang-linker-wrapper will + // compile it to SPIRV. + bool UseSPIRVBackendForHipDeviceOnlyNoRDC = + TargetDeviceOffloadKind == Action::OFK_HIP && OffloadingToolChain && + OffloadingToolChain->getTriple().isSPIRV() && UseSPIRVBackend && + offloadDeviceOnly() && + !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false); + if (Args.hasArg(options::OPT_emit_llvm) || TargetDeviceOffloadKind == Action::OFK_SYCL || (((Input->getOffloadingToolChain() && Input->getOffloadingToolChain()->getTriple().isAMDGPU() && TargetDeviceOffloadKind != Action::OFK_None) || TargetDeviceOffloadKind == Action::OFK_HIP) && + !UseSPIRVBackendForHipDeviceOnlyNoRDC && ((Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) || (Args.hasFlag(options::OPT_offload_new_driver, @@ -5306,6 +5298,19 @@ Action *Driver::ConstructPhaseAction( : types::TY_LLVM_BC; return C.MakeAction(Input, Output); } + + // The SPIRV backend compilation path for HIP must avoid external + // dependencies. The default compilation path assembles and links its + // output, but the SPIRV assembler and linker are external tools. This code + // ensures the backend emits binary SPIRV directly to bypass those steps and + // avoid failures. Without -save-temps, the compiler may already skip + // assembling and linking. With -save-temps, these steps must be explicitly + // disabled, as done here. We also force skipping these steps regardless of + // -save-temps to avoid relying on optimizations (unless -S is set). + // The current HIP bundling expects the type to be types::TY_Image + if (UseSPIRVBackendForHipDeviceOnlyNoRDC && !Args.hasArg(options::OPT_S)) + return C.MakeAction(Input, types::TY_Image); + return C.MakeAction(Input, types::TY_PP_Asm); } case phases::Assemble: diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 952b4fce80e5b..1d148f3e02692 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -33,6 +33,7 @@ #include "clang/Driver/SanitizerArgs.h" #include "clang/Driver/Types.h" #include "clang/Driver/XRayArgs.h" +#include "clang/Options/OptionUtils.h" #include "clang/Options/Options.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" @@ -4487,6 +4488,10 @@ renderDebugOptions(const ToolChain &TC, const Driver &D, const llvm::Triple &T, DebuggerTuning != llvm::DebuggerKind::DBX))) CmdArgs.push_back("-gno-column-info"); + if (!Args.hasFlag(options::OPT_gcall_site_info, + options::OPT_gno_call_site_info, true)) + CmdArgs.push_back("-gno-call-site-info"); + // FIXME: Move backend command line options to the module. if (Args.hasFlag(options::OPT_gmodules, options::OPT_gno_modules, false)) { // If -gline-tables-only or -gline-directives-only is the last option it @@ -5136,6 +5141,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.ClaimAllArgs(options::OPT_femit_dwarf_unwind_EQ); } + bool IsAMDSPIRVForHIPDevice = + IsHIPDevice && getToolChain().getTriple().isSPIRV() && + getToolChain().getTriple().getVendor() == llvm::Triple::AMD; + if (isa(JA)) { assert(JA.getType() == types::TY_Plist && "Invalid output type."); CmdArgs.push_back("-analyze"); @@ -5236,6 +5245,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, rewriteKind = RK_Fragile; } else if (JA.getType() == types::TY_CIR) { CmdArgs.push_back("-emit-cir"); + } else if (JA.getType() == types::TY_Image && IsAMDSPIRVForHIPDevice) { + CmdArgs.push_back("-emit-obj"); } else { assert(JA.getType() == types::TY_PP_Asm && "Unexpected output type!"); } @@ -9277,7 +9288,9 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, OPT_fno_lto, OPT_flto, OPT_flto_partitions_EQ, - OPT_flto_EQ}; + OPT_flto_EQ, + OPT_use_spirv_backend}; + const llvm::DenseSet LinkerOptions{OPT_mllvm, OPT_Zlinker_input}; auto ShouldForwardForToolChain = [&](Arg *A, const ToolChain &TC) { // Don't forward -mllvm to toolchains that don't support LLVM. diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index d58735294aa62..ae373c60b47a1 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -3587,169 +3587,6 @@ void tools::handleInterchangeLoopsArgs(const ArgList &Args, CmdArgs.push_back("-floop-interchange"); } -// Parse -mprefer-vector-width=. Return the Value string if well-formed. -// Otherwise, return an empty string and issue a diagnosic message if needed. -StringRef tools::parseMPreferVectorWidthOption(clang::DiagnosticsEngine &Diags, - const llvm::opt::ArgList &Args) { - Arg *A = Args.getLastArg(options::OPT_mprefer_vector_width_EQ); - if (!A) - return ""; - - StringRef Value = A->getValue(); - unsigned Width LLVM_ATTRIBUTE_UNINITIALIZED; - - // Only "none" and Integer values are accepted by - // -mprefer-vector-width=. - if (Value != "none" && Value.getAsInteger(10, Width)) { - Diags.Report(clang::diag::err_drv_invalid_value) - << A->getOption().getName() << Value; - return ""; - } - - return Value; -} - -// This is a helper function for validating the optional refinement step -// parameter in reciprocal argument strings. Return false if there is an error -// parsing the refinement step. Otherwise, return true and set the Position -// of the refinement step in the input string. -static bool getRefinementStep(StringRef In, clang::DiagnosticsEngine &Diags, - const Arg &A, size_t &Position) { - const char RefinementStepToken = ':'; - Position = In.find(RefinementStepToken); - if (Position != StringRef::npos) { - StringRef Option = A.getOption().getName(); - StringRef RefStep = In.substr(Position + 1); - // Allow exactly one numeric character for the additional refinement - // step parameter. This is reasonable for all currently-supported - // operations and architectures because we would expect that a larger value - // of refinement steps would cause the estimate "optimization" to - // under-perform the native operation. Also, if the estimate does not - // converge quickly, it probably will not ever converge, so further - // refinement steps will not produce a better answer. - if (RefStep.size() != 1) { - Diags.Report(diag::err_drv_invalid_value) << Option << RefStep; - return false; - } - char RefStepChar = RefStep[0]; - if (RefStepChar < '0' || RefStepChar > '9') { - Diags.Report(diag::err_drv_invalid_value) << Option << RefStep; - return false; - } - } - return true; -} - -// Parse -mrecip. Return the Value string if well-formed. -// Otherwise, return an empty string and issue a diagnosic message if needed. -StringRef tools::parseMRecipOption(clang::DiagnosticsEngine &Diags, - const ArgList &Args) { - StringRef DisabledPrefixIn = "!"; - StringRef DisabledPrefixOut = "!"; - StringRef EnabledPrefixOut = ""; - StringRef Out = ""; - - Arg *A = Args.getLastArg(options::OPT_mrecip, options::OPT_mrecip_EQ); - if (!A) - return ""; - - unsigned NumOptions = A->getNumValues(); - if (NumOptions == 0) { - // No option is the same as "all". - return "all"; - } - - // Pass through "all", "none", or "default" with an optional refinement step. - if (NumOptions == 1) { - StringRef Val = A->getValue(0); - size_t RefStepLoc; - if (!getRefinementStep(Val, Diags, *A, RefStepLoc)) - return ""; - StringRef ValBase = Val.slice(0, RefStepLoc); - if (ValBase == "all" || ValBase == "none" || ValBase == "default") { - return Val; - } - } - - // Each reciprocal type may be enabled or disabled individually. - // Check each input value for validity, concatenate them all back together, - // and pass through. - - llvm::StringMap OptionStrings; - OptionStrings.insert(std::make_pair("divd", false)); - OptionStrings.insert(std::make_pair("divf", false)); - OptionStrings.insert(std::make_pair("divh", false)); - OptionStrings.insert(std::make_pair("vec-divd", false)); - OptionStrings.insert(std::make_pair("vec-divf", false)); - OptionStrings.insert(std::make_pair("vec-divh", false)); - OptionStrings.insert(std::make_pair("sqrtd", false)); - OptionStrings.insert(std::make_pair("sqrtf", false)); - OptionStrings.insert(std::make_pair("sqrth", false)); - OptionStrings.insert(std::make_pair("vec-sqrtd", false)); - OptionStrings.insert(std::make_pair("vec-sqrtf", false)); - OptionStrings.insert(std::make_pair("vec-sqrth", false)); - - for (unsigned i = 0; i != NumOptions; ++i) { - StringRef Val = A->getValue(i); - - bool IsDisabled = Val.starts_with(DisabledPrefixIn); - // Ignore the disablement token for string matching. - if (IsDisabled) - Val = Val.substr(1); - - size_t RefStep; - if (!getRefinementStep(Val, Diags, *A, RefStep)) - return ""; - - StringRef ValBase = Val.slice(0, RefStep); - llvm::StringMap::iterator OptionIter = OptionStrings.find(ValBase); - if (OptionIter == OptionStrings.end()) { - // Try again specifying float suffix. - OptionIter = OptionStrings.find(ValBase.str() + 'f'); - if (OptionIter == OptionStrings.end()) { - // The input name did not match any known option string. - Diags.Report(diag::err_drv_unknown_argument) << Val; - return ""; - } - // The option was specified without a half or float or double suffix. - // Make sure that the double or half entry was not already specified. - // The float entry will be checked below. - if (OptionStrings[ValBase.str() + 'd'] || - OptionStrings[ValBase.str() + 'h']) { - Diags.Report(diag::err_drv_invalid_value) - << A->getOption().getName() << Val; - return ""; - } - } - - if (OptionIter->second == true) { - // Duplicate option specified. - Diags.Report(diag::err_drv_invalid_value) - << A->getOption().getName() << Val; - return ""; - } - - // Mark the matched option as found. Do not allow duplicate specifiers. - OptionIter->second = true; - - // If the precision was not specified, also mark the double and half entry - // as found. - if (ValBase.back() != 'f' && ValBase.back() != 'd' && - ValBase.back() != 'h') { - OptionStrings[ValBase.str() + 'd'] = true; - OptionStrings[ValBase.str() + 'h'] = true; - } - - // Build the output string. - StringRef Prefix = IsDisabled ? DisabledPrefixOut : EnabledPrefixOut; - Out = Args.MakeArgString(Out + Prefix + Val); - if (i != NumOptions - 1) - Out = Args.MakeArgString(Out + ","); - } - - return Out; -} - std::string tools::complexRangeKindToStr(LangOptions::ComplexRangeKind Range) { switch (Range) { case LangOptions::ComplexRangeKind::CX_Full: diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 413386e8288c9..b7a35d6ab5195 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -11,6 +11,7 @@ #include "clang/Basic/CodeGenOptions.h" #include "clang/Driver/CommonArgs.h" +#include "clang/Options/OptionUtils.h" #include "clang/Options/Options.h" #include "llvm/Frontend/Debug/Options.h" #include "llvm/Support/Path.h" diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index 38d56a24ef006..80ce547789b2b 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -168,10 +168,9 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, // For SPIR-V the inputs for the job are device AMDGCN SPIR-V flavoured bitcode // and the output is either a compiled SPIR-V binary or bitcode (-emit-llvm). It -// calls llvm-link and then the llvm-spirv translator. Once the SPIR-V BE will -// be promoted from experimental, we will switch to using that. TODO: consider -// if we want to run any targeted optimisations over IR here, over generic -// SPIR-V. +// calls llvm-link and then the llvm-spirv translator or the SPIR-V BE. +// TODO: consider if we want to run any targeted optimisations over IR here, +// over generic SPIR-V. void AMDGCN::Linker::constructLinkAndEmitSpirvCommand( Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const InputInfo &Output, const llvm::opt::ArgList &Args) const { @@ -182,17 +181,41 @@ void AMDGCN::Linker::constructLinkAndEmitSpirvCommand( const char *LinkedBCFilePath = HIP::getTempFile(C, LinkedBCFilePrefix, "bc"); InputInfo LinkedBCFile(&JA, LinkedBCFilePath, Output.getBaseInput()); + bool UseSPIRVBackend = + Args.hasFlag(options::OPT_use_spirv_backend, + options::OPT_no_use_spirv_backend, /*Default=*/false); + constructLlvmLinkCommand(C, JA, Inputs, LinkedBCFile, Args); - // Emit SPIR-V binary. - llvm::opt::ArgStringList TrArgs{ - "--spirv-max-version=1.6", - "--spirv-ext=+all", - "--spirv-allow-unknown-intrinsics", - "--spirv-lower-const-expr", - "--spirv-preserve-auxdata", - "--spirv-debug-info-version=nonsemantic-shader-200"}; - SPIRV::constructTranslateCommand(C, *this, JA, Output, LinkedBCFile, TrArgs); + if (UseSPIRVBackend) { + // This code handles the case in the new driver when --offload-device-only + // is unset and clang-linker-wrapper forwards the bitcode that must be + // compiled to SPIR-V. + + llvm::opt::ArgStringList CmdArgs; + const char *Triple = + C.getArgs().MakeArgString("-triple=spirv64-amd-amdhsa"); + + CmdArgs.append({"-cc1", Triple, "-emit-obj", "-disable-llvm-optzns", + LinkedBCFile.getFilename(), "-o", Output.getFilename()}); + + const Driver &Driver = getToolChain().getDriver(); + const char *Exec = Driver.getClangProgramPath(); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::None(), Exec, CmdArgs, LinkedBCFile, + Output, Driver.getPrependArg())); + } else { + // Emit SPIR-V binary using the translator + llvm::opt::ArgStringList TrArgs{ + "--spirv-max-version=1.6", + "--spirv-ext=+all", + "--spirv-allow-unknown-intrinsics", + "--spirv-lower-const-expr", + "--spirv-preserve-auxdata", + "--spirv-debug-info-version=nonsemantic-shader-200"}; + SPIRV::constructTranslateCommand(C, *this, JA, Output, LinkedBCFile, + TrArgs); + } } // For amdgcn the inputs of the linker job are device bitcode and output is diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index f93725c8c9887..8d2758bebe0fd 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -1156,12 +1156,12 @@ SanitizerMask Linux::getSupportedSanitizers() const { if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ || IsLoongArch64 || IsRISCV64) Res |= SanitizerKind::Thread; - if (IsX86_64 || IsAArch64) + if (IsX86_64 || IsAArch64 || IsSystemZ) Res |= SanitizerKind::Type; if (IsX86_64 || IsSystemZ || IsPowerPC64) Res |= SanitizerKind::KernelMemory; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch || - IsPowerPC64 || IsHexagon || IsLoongArch64 || IsRISCV64) + IsPowerPC64 || IsHexagon || IsLoongArch64 || IsRISCV64 || IsSystemZ) Res |= SanitizerKind::Scudo; if (IsX86_64 || IsAArch64 || IsRISCV64) { Res |= SanitizerKind::HWAddress; diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 4b4d5785c21a0..c7357bcd9e367 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -44,6 +44,7 @@ #include "clang/Frontend/FrontendOptions.h" #include "clang/Frontend/MultiplexConsumer.h" #include "clang/Frontend/PrecompiledPreamble.h" +#include "clang/Frontend/StandaloneDiagnostic.h" #include "clang/Frontend/Utils.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/HeaderSearchOptions.h" @@ -210,15 +211,6 @@ getBufferForFileHandlingRemapping(const CompilerInvocation &Invocation, return llvm::MemoryBuffer::getMemBufferCopy(Buffer->getBuffer(), FilePath); } -struct ASTUnit::ASTWriterData { - SmallString<128> Buffer; - llvm::BitstreamWriter Stream; - ASTWriter Writer; - - ASTWriterData(ModuleCache &ModCache, const CodeGenOptions &CGOpts) - : Stream(Buffer), Writer(Stream, Buffer, ModCache, CGOpts, {}) {} -}; - void ASTUnit::clearFileLevelDecls() { FileDecls.clear(); } @@ -581,73 +573,24 @@ class ASTInfoCollector : public ASTReaderListener { Counter = NewCounter; } }; +} // anonymous namespace -/// Diagnostic consumer that saves each diagnostic it is given. -class FilterAndStoreDiagnosticConsumer : public DiagnosticConsumer { - SmallVectorImpl *StoredDiags; - SmallVectorImpl *StandaloneDiags; - bool CaptureNonErrorsFromIncludes = true; - const LangOptions *LangOpts = nullptr; - SourceManager *SourceMgr = nullptr; - -public: - FilterAndStoreDiagnosticConsumer( - SmallVectorImpl *StoredDiags, - SmallVectorImpl *StandaloneDiags, - bool CaptureNonErrorsFromIncludes) - : StoredDiags(StoredDiags), StandaloneDiags(StandaloneDiags), - CaptureNonErrorsFromIncludes(CaptureNonErrorsFromIncludes) { - assert((StoredDiags || StandaloneDiags) && - "No output collections were passed to StoredDiagnosticConsumer."); - } - - void BeginSourceFile(const LangOptions &LangOpts, - const Preprocessor *PP = nullptr) override { - this->LangOpts = &LangOpts; - if (PP) - SourceMgr = &PP->getSourceManager(); - } - - void HandleDiagnostic(DiagnosticsEngine::Level Level, - const Diagnostic &Info) override; -}; - -/// RAII object that optionally captures and filters diagnostics, if -/// there is no diagnostic client to capture them already. -class CaptureDroppedDiagnostics { - DiagnosticsEngine &Diags; - FilterAndStoreDiagnosticConsumer Client; - DiagnosticConsumer *PreviousClient = nullptr; - std::unique_ptr OwningPreviousClient; - -public: - CaptureDroppedDiagnostics( - CaptureDiagsKind CaptureDiagnostics, DiagnosticsEngine &Diags, - SmallVectorImpl *StoredDiags, - SmallVectorImpl *StandaloneDiags) - : Diags(Diags), - Client(StoredDiags, StandaloneDiags, - CaptureDiagnostics != - CaptureDiagsKind::AllWithoutNonErrorsFromIncludes) { - if (CaptureDiagnostics != CaptureDiagsKind::None || - Diags.getClient() == nullptr) { - OwningPreviousClient = Diags.takeClient(); - PreviousClient = Diags.getClient(); - Diags.setClient(&Client, false); - } - } - - ~CaptureDroppedDiagnostics() { - if (Diags.getClient() == &Client) - Diags.setClient(PreviousClient, !!OwningPreviousClient.release()); - } -}; - -} // namespace +FilterAndStoreDiagnosticConsumer::FilterAndStoreDiagnosticConsumer( + SmallVectorImpl *StoredDiags, + SmallVectorImpl *StandaloneDiags, + bool CaptureNonErrorsFromIncludes) + : StoredDiags(StoredDiags), StandaloneDiags(StandaloneDiags), + CaptureNonErrorsFromIncludes(CaptureNonErrorsFromIncludes) { + assert((StoredDiags || StandaloneDiags) && + "No output collections were passed to StoredDiagnosticConsumer."); +} -static ASTUnit::StandaloneDiagnostic -makeStandaloneDiagnostic(const LangOptions &LangOpts, - const StoredDiagnostic &InDiag); +void FilterAndStoreDiagnosticConsumer::BeginSourceFile( + const LangOptions &LangOpts, const Preprocessor *PP) { + this->LangOpts = &LangOpts; + if (PP) + SourceMgr = &PP->getSourceManager(); +} static bool isInMainFile(const clang::Diagnostic &D) { if (!D.hasSourceManager() || !D.getLocation().isValid()) @@ -683,12 +626,32 @@ void FilterAndStoreDiagnosticConsumer::HandleDiagnostic( StoredDiag.emplace(Level, Info); ResultDiag = &*StoredDiag; } - StandaloneDiags->push_back( - makeStandaloneDiagnostic(*LangOpts, *ResultDiag)); + StandaloneDiags->emplace_back(*LangOpts, *ResultDiag); } } } +CaptureDroppedDiagnostics::CaptureDroppedDiagnostics( + CaptureDiagsKind CaptureDiagnostics, DiagnosticsEngine &Diags, + SmallVectorImpl *StoredDiags, + SmallVectorImpl *StandaloneDiags) + : Diags(Diags), + Client(StoredDiags, StandaloneDiags, + CaptureDiagnostics != + CaptureDiagsKind::AllWithoutNonErrorsFromIncludes) { + if (CaptureDiagnostics != CaptureDiagsKind::None || + Diags.getClient() == nullptr) { + OwningPreviousClient = Diags.takeClient(); + PreviousClient = Diags.getClient(); + Diags.setClient(&Client, false); + } +} + +CaptureDroppedDiagnostics::~CaptureDroppedDiagnostics() { + if (Diags.getClient() == &Client) + Diags.setClient(PreviousClient, !!OwningPreviousClient.release()); +} + IntrusiveRefCntPtr ASTUnit::getASTReader() const { return Reader; } @@ -1110,7 +1073,7 @@ class ASTUnitPreambleCallbacks : public PreambleCallbacks { unsigned Hash = 0; std::vector TopLevelDecls; std::vector TopLevelDeclIDs; - llvm::SmallVector PreambleDiags; + llvm::SmallVector PreambleDiags; }; } // namespace @@ -1259,10 +1222,17 @@ bool ASTUnit::Parse(std::shared_ptr PCHContainerOps, if (!Act->BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])) return true; - if (SavedMainFileBuffer) - TranslateStoredDiagnostics(getFileManager(), getSourceManager(), - PreambleDiagnostics, StoredDiagnostics); - else + if (SavedMainFileBuffer) { + StoredDiagnostics.clear(); + StoredDiagnostics.reserve(PreambleDiagnostics.size()); + llvm::transform(std::move(PreambleDiagnostics), + std::back_inserter(StoredDiagnostics), + [&](auto &&StandaloneDiag) { + return translateStandaloneDiag( + getFileManager(), getSourceManager(), + std::move(StandaloneDiag), PreambleSrcLocCache); + }); + } else PreambleSrcLocCache.clear(); if (llvm::Error Err = Act->Execute()) { @@ -1281,51 +1251,6 @@ bool ASTUnit::Parse(std::shared_ptr PCHContainerOps, return false; } -static std::pair -makeStandaloneRange(CharSourceRange Range, const SourceManager &SM, - const LangOptions &LangOpts) { - CharSourceRange FileRange = Lexer::makeFileCharRange(Range, SM, LangOpts); - unsigned Offset = SM.getFileOffset(FileRange.getBegin()); - unsigned EndOffset = SM.getFileOffset(FileRange.getEnd()); - return std::make_pair(Offset, EndOffset); -} - -static ASTUnit::StandaloneFixIt makeStandaloneFixIt(const SourceManager &SM, - const LangOptions &LangOpts, - const FixItHint &InFix) { - ASTUnit::StandaloneFixIt OutFix; - OutFix.RemoveRange = makeStandaloneRange(InFix.RemoveRange, SM, LangOpts); - OutFix.InsertFromRange = - makeStandaloneRange(InFix.InsertFromRange, SM, LangOpts); - OutFix.CodeToInsert = InFix.CodeToInsert; - OutFix.BeforePreviousInsertions = InFix.BeforePreviousInsertions; - return OutFix; -} - -static ASTUnit::StandaloneDiagnostic -makeStandaloneDiagnostic(const LangOptions &LangOpts, - const StoredDiagnostic &InDiag) { - ASTUnit::StandaloneDiagnostic OutDiag; - OutDiag.ID = InDiag.getID(); - OutDiag.Level = InDiag.getLevel(); - OutDiag.Message = std::string(InDiag.getMessage()); - OutDiag.LocOffset = 0; - if (InDiag.getLocation().isInvalid()) - return OutDiag; - const SourceManager &SM = InDiag.getLocation().getManager(); - SourceLocation FileLoc = SM.getFileLoc(InDiag.getLocation()); - OutDiag.Filename = std::string(SM.getFilename(FileLoc)); - if (OutDiag.Filename.empty()) - return OutDiag; - OutDiag.LocOffset = SM.getFileOffset(FileLoc); - for (const auto &Range : InDiag.getRanges()) - OutDiag.Ranges.push_back(makeStandaloneRange(Range, SM, LangOpts)); - for (const auto &FixIt : InDiag.getFixIts()) - OutDiag.FixIts.push_back(makeStandaloneFixIt(SM, LangOpts, FixIt)); - - return OutDiag; -} - /// Attempt to build or re-use a precompiled preamble when (re-)parsing /// the source file. /// @@ -1780,114 +1705,6 @@ std::unique_ptr ASTUnit::LoadFromCompilerInvocation( return AST; } -std::unique_ptr ASTUnit::LoadFromCommandLine( - const char **ArgBegin, const char **ArgEnd, - std::shared_ptr PCHContainerOps, - std::shared_ptr DiagOpts, - IntrusiveRefCntPtr Diags, StringRef ResourceFilesPath, - bool StorePreamblesInMemory, StringRef PreambleStoragePath, - bool OnlyLocalDecls, CaptureDiagsKind CaptureDiagnostics, - ArrayRef RemappedFiles, bool RemappedFilesKeepOriginalName, - unsigned PrecompilePreambleAfterNParses, TranslationUnitKind TUKind, - bool CacheCodeCompletionResults, bool IncludeBriefCommentsInCodeCompletion, - bool AllowPCHWithCompilerErrors, SkipFunctionBodiesScope SkipFunctionBodies, - bool SingleFileParse, bool UserFilesAreVolatile, bool ForSerialization, - bool RetainExcludedConditionalBlocks, std::optional ModuleFormat, - std::unique_ptr *ErrAST, - IntrusiveRefCntPtr VFS) { - assert(Diags.get() && "no DiagnosticsEngine was provided"); - - // If no VFS was provided, create one that tracks the physical file system. - // If '-working-directory' was passed as an argument, 'createInvocation' will - // set this as the current working directory of the VFS. - if (!VFS) - VFS = llvm::vfs::createPhysicalFileSystem(); - - SmallVector StoredDiagnostics; - - std::shared_ptr CI; - - { - CaptureDroppedDiagnostics Capture(CaptureDiagnostics, *Diags, - &StoredDiagnostics, nullptr); - - CreateInvocationOptions CIOpts; - CIOpts.VFS = VFS; - CIOpts.Diags = Diags; - CIOpts.ProbePrecompiled = true; // FIXME: historical default. Needed? - CI = createInvocation(llvm::ArrayRef(ArgBegin, ArgEnd), std::move(CIOpts)); - if (!CI) - return nullptr; - } - - // Override any files that need remapping - for (const auto &RemappedFile : RemappedFiles) { - CI->getPreprocessorOpts().addRemappedFile(RemappedFile.first, - RemappedFile.second); - } - PreprocessorOptions &PPOpts = CI->getPreprocessorOpts(); - PPOpts.RemappedFilesKeepOriginalName = RemappedFilesKeepOriginalName; - PPOpts.AllowPCHWithCompilerErrors = AllowPCHWithCompilerErrors; - PPOpts.SingleFileParseMode = SingleFileParse; - PPOpts.RetainExcludedConditionalBlocks = RetainExcludedConditionalBlocks; - - // Override the resources path. - CI->getHeaderSearchOpts().ResourceDir = std::string(ResourceFilesPath); - - CI->getFrontendOpts().SkipFunctionBodies = - SkipFunctionBodies == SkipFunctionBodiesScope::PreambleAndMainFile; - - if (ModuleFormat) - CI->getHeaderSearchOpts().ModuleFormat = std::string(*ModuleFormat); - - // Create the AST unit. - std::unique_ptr AST; - AST.reset(new ASTUnit(false)); - AST->NumStoredDiagnosticsFromDriver = StoredDiagnostics.size(); - AST->StoredDiagnostics.swap(StoredDiagnostics); - ConfigureDiags(Diags, *AST, CaptureDiagnostics); - AST->DiagOpts = DiagOpts; - AST->Diagnostics = Diags; - AST->FileSystemOpts = CI->getFileSystemOpts(); - AST->CodeGenOpts = std::make_unique(CI->getCodeGenOpts()); - VFS = createVFSFromCompilerInvocation(*CI, *Diags, VFS); - AST->FileMgr = - llvm::makeIntrusiveRefCnt(AST->FileSystemOpts, VFS); - AST->StorePreamblesInMemory = StorePreamblesInMemory; - AST->PreambleStoragePath = PreambleStoragePath; - AST->ModCache = createCrossProcessModuleCache(); - AST->OnlyLocalDecls = OnlyLocalDecls; - AST->CaptureDiagnostics = CaptureDiagnostics; - AST->TUKind = TUKind; - AST->ShouldCacheCodeCompletionResults = CacheCodeCompletionResults; - AST->IncludeBriefCommentsInCodeCompletion = - IncludeBriefCommentsInCodeCompletion; - AST->UserFilesAreVolatile = UserFilesAreVolatile; - AST->Invocation = CI; - AST->SkipFunctionBodies = SkipFunctionBodies; - if (ForSerialization) - AST->WriterData.reset(new ASTWriterData(*AST->ModCache, *AST->CodeGenOpts)); - // Zero out now to ease cleanup during crash recovery. - CI = nullptr; - Diags = nullptr; - - // Recover resources if we crash before exiting this method. - llvm::CrashRecoveryContextCleanupRegistrar ASTUnitCleanup(AST.get()); - - if (AST->LoadFromCompilerInvocation(std::move(PCHContainerOps), - PrecompilePreambleAfterNParses, VFS)) { - // Some error occurred, if caller wants to examine diagnostics, pass it the - // ASTUnit. - if (ErrAST) { - AST->StoredDiagnostics.swap(AST->FailedParseDiagnostics); - ErrAST->swap(AST); - } - return nullptr; - } - - return AST; -} - bool ASTUnit::Reparse(std::shared_ptr PCHContainerOps, ArrayRef RemappedFiles, IntrusiveRefCntPtr VFS) { @@ -2406,64 +2223,6 @@ bool ASTUnit::serialize(raw_ostream &OS) { return serializeUnit(Writer, Buffer, getSema(), OS); } -void ASTUnit::TranslateStoredDiagnostics( - FileManager &FileMgr, SourceManager &SrcMgr, - const SmallVectorImpl &Diags, - SmallVectorImpl &Out) { - // Map the standalone diagnostic into the new source manager. We also need to - // remap all the locations to the new view. This includes the diag location, - // any associated source ranges, and the source ranges of associated fix-its. - // FIXME: There should be a cleaner way to do this. - SmallVector Result; - Result.reserve(Diags.size()); - - for (const auto &SD : Diags) { - // Rebuild the StoredDiagnostic. - if (SD.Filename.empty()) - continue; - auto FE = FileMgr.getOptionalFileRef(SD.Filename); - if (!FE) - continue; - SourceLocation FileLoc; - auto ItFileID = PreambleSrcLocCache.find(SD.Filename); - if (ItFileID == PreambleSrcLocCache.end()) { - FileID FID = SrcMgr.translateFile(*FE); - FileLoc = SrcMgr.getLocForStartOfFile(FID); - PreambleSrcLocCache[SD.Filename] = FileLoc; - } else { - FileLoc = ItFileID->getValue(); - } - - if (FileLoc.isInvalid()) - continue; - SourceLocation L = FileLoc.getLocWithOffset(SD.LocOffset); - FullSourceLoc Loc(L, SrcMgr); - - SmallVector Ranges; - Ranges.reserve(SD.Ranges.size()); - for (const auto &Range : SD.Ranges) { - SourceLocation BL = FileLoc.getLocWithOffset(Range.first); - SourceLocation EL = FileLoc.getLocWithOffset(Range.second); - Ranges.push_back(CharSourceRange::getCharRange(BL, EL)); - } - - SmallVector FixIts; - FixIts.reserve(SD.FixIts.size()); - for (const auto &FixIt : SD.FixIts) { - FixIts.push_back(FixItHint()); - FixItHint &FH = FixIts.back(); - FH.CodeToInsert = FixIt.CodeToInsert; - SourceLocation BL = FileLoc.getLocWithOffset(FixIt.RemoveRange.first); - SourceLocation EL = FileLoc.getLocWithOffset(FixIt.RemoveRange.second); - FH.RemoveRange = CharSourceRange::getCharRange(BL, EL); - } - - Result.push_back( - StoredDiagnostic(SD.Level, SD.ID, SD.Message, Loc, Ranges, FixIts)); - } - Result.swap(Out); -} - void ASTUnit::addFileLevelDecl(Decl *D) { assert(D); diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt index dac9e0d26f393..634f239933605 100644 --- a/clang/lib/Frontend/CMakeLists.txt +++ b/clang/lib/Frontend/CMakeLists.txt @@ -17,7 +17,6 @@ add_clang_library(clangFrontend ChainedIncludesSource.cpp CompilerInstance.cpp CompilerInvocation.cpp - CreateInvocationFromCommandLine.cpp DependencyFile.cpp DependencyGraph.cpp DiagnosticRenderer.cpp @@ -36,6 +35,7 @@ add_clang_library(clangFrontend SARIFDiagnosticPrinter.cpp SerializedDiagnosticPrinter.cpp SerializedDiagnosticReader.cpp + StandaloneDiagnostic.cpp TestModuleFileExtension.cpp TextDiagnostic.cpp TextDiagnosticBuffer.cpp @@ -51,7 +51,6 @@ add_clang_library(clangFrontend clangAPINotes clangAST clangBasic - clangDriver clangOptions clangEdit clangLex diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index fd9d78d9ae689..834ddbc8420d9 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -27,7 +27,6 @@ #include "clang/Basic/Version.h" #include "clang/Basic/XRayInstr.h" #include "clang/Config/config.h" -#include "clang/Driver/Driver.h" #include "clang/Frontend/CommandLineSourceLoc.h" #include "clang/Frontend/DependencyOutputOptions.h" #include "clang/Frontend/FrontendOptions.h" @@ -3277,13 +3276,6 @@ static bool ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, return Diags.getNumErrors() == NumErrorsBefore; } -std::string CompilerInvocation::GetResourcesPath(const char *Argv0, - void *MainAddr) { - std::string ClangExecutable = - llvm::sys::fs::getMainExecutable(Argv0, MainAddr); - return driver::Driver::GetResourcesPath(ClangExecutable); -} - static void GenerateHeaderSearchArgs(const HeaderSearchOptions &Opts, ArgumentConsumer Consumer) { const HeaderSearchOptions *HeaderSearchOpts = &Opts; @@ -4023,21 +4015,7 @@ void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts, std::to_string(*Opts.AllocTokenMax)); if (Opts.AllocTokenMode) { - StringRef S; - switch (*Opts.AllocTokenMode) { - case llvm::AllocTokenMode::Increment: - S = "increment"; - break; - case llvm::AllocTokenMode::Random: - S = "random"; - break; - case llvm::AllocTokenMode::TypeHash: - S = "typehash"; - break; - case llvm::AllocTokenMode::TypeHashPointerSplit: - S = "typehashpointersplit"; - break; - } + StringRef S = llvm::getAllocTokenModeAsString(*Opts.AllocTokenMode); GenerateArg(Consumer, OPT_falloc_token_mode_EQ, S); } } diff --git a/clang/lib/Frontend/StandaloneDiagnostic.cpp b/clang/lib/Frontend/StandaloneDiagnostic.cpp new file mode 100644 index 0000000000000..4f19c91b7d266 --- /dev/null +++ b/clang/lib/Frontend/StandaloneDiagnostic.cpp @@ -0,0 +1,117 @@ +//===--- StandaloneDiagnostic.h - Serializable Diagnostic ------------- ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Frontend/StandaloneDiagnostic.h" +#include "clang/Lex/Lexer.h" + +namespace clang { + +StandaloneDiagnostic::SourceOffsetRange::SourceOffsetRange( + CharSourceRange Range, const SourceManager &SrcMgr, + const LangOptions &LangOpts) { + const auto FileRange = Lexer::makeFileCharRange(Range, SrcMgr, LangOpts); + Begin = SrcMgr.getFileOffset(FileRange.getBegin()); + End = SrcMgr.getFileOffset(FileRange.getEnd()); +} + +StandaloneDiagnostic::StandaloneFixIt::StandaloneFixIt( + const SourceManager &SrcMgr, const LangOptions &LangOpts, + const FixItHint &FixIt) + : RemoveRange(FixIt.RemoveRange, SrcMgr, LangOpts), + InsertFromRange(FixIt.InsertFromRange, SrcMgr, LangOpts), + CodeToInsert(FixIt.CodeToInsert), + BeforePreviousInsertions(FixIt.BeforePreviousInsertions) {} + +StandaloneDiagnostic::StandaloneDiagnostic(const LangOptions &LangOpts, + const StoredDiagnostic &InDiag) + : Level(InDiag.getLevel()), ID(InDiag.getID()), + Message(InDiag.getMessage()) { + const FullSourceLoc &FullLoc = InDiag.getLocation(); + // This is not an invalid diagnostic; invalid SourceLocations are used to + // represent diagnostics without a specific SourceLocation. + if (FullLoc.isInvalid()) + return; + + const auto &SrcMgr = FullLoc.getManager(); + FileKind = SrcMgr.getFileCharacteristic(static_cast(FullLoc)); + const auto FileLoc = SrcMgr.getFileLoc(static_cast(FullLoc)); + FileOffset = SrcMgr.getFileOffset(FileLoc); + Filename = SrcMgr.getFilename(FileLoc); + assert(!Filename.empty() && "diagnostic with location has no source file?"); + + Ranges.reserve(InDiag.getRanges().size()); + for (const auto &Range : InDiag.getRanges()) + Ranges.emplace_back(Range, SrcMgr, LangOpts); + + FixIts.reserve(InDiag.getFixIts().size()); + for (const auto &FixIt : InDiag.getFixIts()) + FixIts.emplace_back(SrcMgr, LangOpts, FixIt); +} + +StoredDiagnostic +translateStandaloneDiag(FileManager &FileMgr, SourceManager &SrcMgr, + const StandaloneDiagnostic &StandaloneDiag, + llvm::StringMap &SrcLocCache) { + const auto FileRef = FileMgr.getOptionalFileRef(StandaloneDiag.Filename); + if (!FileRef) + return StoredDiagnostic(StandaloneDiag.Level, StandaloneDiag.ID, + StandaloneDiag.Message); + + // Try to get FileLoc from cache first + SourceLocation FileLoc; + auto It = SrcLocCache.find(StandaloneDiag.Filename); + if (It != SrcLocCache.end()) { + FileLoc = It->getValue(); + } + + // Cache miss - compute and cache the location + if (FileLoc.isInvalid()) { + const auto FileID = + SrcMgr.getOrCreateFileID(*FileRef, StandaloneDiag.FileKind); + FileLoc = SrcMgr.getLocForStartOfFile(FileID); + + if (FileLoc.isInvalid()) + return StoredDiagnostic(StandaloneDiag.Level, StandaloneDiag.ID, + StandaloneDiag.Message); + + SrcLocCache[StandaloneDiag.Filename] = FileLoc; + } + + const auto DiagLoc = FileLoc.getLocWithOffset(StandaloneDiag.FileOffset); + const FullSourceLoc Loc(DiagLoc, SrcMgr); + + auto ConvertOffsetRange = + [&](const StandaloneDiagnostic::SourceOffsetRange &Range) { + return CharSourceRange( + SourceRange(FileLoc.getLocWithOffset(Range.Begin), + FileLoc.getLocWithOffset(Range.End)), + /*IsTokenRange*/ false); + }; + + SmallVector TranslatedRanges; + TranslatedRanges.reserve(StandaloneDiag.Ranges.size()); + transform(StandaloneDiag.Ranges, std::back_inserter(TranslatedRanges), + ConvertOffsetRange); + + SmallVector TranslatedFixIts; + TranslatedFixIts.reserve(StandaloneDiag.FixIts.size()); + for (const auto &FixIt : StandaloneDiag.FixIts) { + FixItHint TranslatedFixIt; + TranslatedFixIt.CodeToInsert = FixIt.CodeToInsert; + TranslatedFixIt.RemoveRange = ConvertOffsetRange(FixIt.RemoveRange); + TranslatedFixIt.InsertFromRange = ConvertOffsetRange(FixIt.InsertFromRange); + TranslatedFixIt.BeforePreviousInsertions = FixIt.BeforePreviousInsertions; + TranslatedFixIts.push_back(std::move(TranslatedFixIt)); + } + + return StoredDiagnostic(StandaloneDiag.Level, StandaloneDiag.ID, + StandaloneDiag.Message, Loc, TranslatedRanges, + TranslatedFixIts); +} + +} // namespace clang diff --git a/clang/lib/Interpreter/CMakeLists.txt b/clang/lib/Interpreter/CMakeLists.txt index 37faa0302caaa..9a597146b2fc4 100644 --- a/clang/lib/Interpreter/CMakeLists.txt +++ b/clang/lib/Interpreter/CMakeLists.txt @@ -46,6 +46,7 @@ add_clang_library(clangInterpreter clangFrontend clangFrontendTool clangLex + clangOptions clangParse clangSema clangSerialization diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 7764fa7dc92b9..6cbc5e9910bcc 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -42,6 +42,7 @@ #include "clang/Interpreter/Interpreter.h" #include "clang/Interpreter/Value.h" #include "clang/Lex/PreprocessorOptions.h" +#include "clang/Options/OptionUtils.h" #include "clang/Options/Options.h" #include "clang/Sema/Lookup.h" #include "clang/Serialization/ObjectFilePCHContainerReader.h" @@ -105,7 +106,7 @@ CreateCI(const llvm::opt::ArgStringList &Argv) { if (Clang->getHeaderSearchOpts().UseBuiltinIncludes && Clang->getHeaderSearchOpts().ResourceDir.empty()) Clang->getHeaderSearchOpts().ResourceDir = - CompilerInvocation::GetResourcesPath(Argv[0], nullptr); + GetResourcesPath(Argv[0], nullptr); Clang->createVirtualFileSystem(); diff --git a/clang/lib/Options/OptionUtils.cpp b/clang/lib/Options/OptionUtils.cpp index fcafd3c83c6b3..e5aefa012f679 100644 --- a/clang/lib/Options/OptionUtils.cpp +++ b/clang/lib/Options/OptionUtils.cpp @@ -9,7 +9,12 @@ #include "clang/Options/OptionUtils.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticDriver.h" +#include "clang/Basic/Version.h" +#include "clang/Config/config.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" using namespace clang; using namespace llvm::opt; @@ -31,17 +36,211 @@ IntTy getLastArgIntValueImpl(const ArgList &Args, OptSpecifier Id, } } // namespace -namespace clang { - -int getLastArgIntValue(const ArgList &Args, OptSpecifier Id, int Default, - DiagnosticsEngine *Diags, unsigned Base) { +int clang::getLastArgIntValue(const ArgList &Args, OptSpecifier Id, int Default, + DiagnosticsEngine *Diags, unsigned Base) { return getLastArgIntValueImpl(Args, Id, Default, Diags, Base); } -uint64_t getLastArgUInt64Value(const ArgList &Args, OptSpecifier Id, - uint64_t Default, DiagnosticsEngine *Diags, - unsigned Base) { +uint64_t clang::getLastArgUInt64Value(const ArgList &Args, OptSpecifier Id, + uint64_t Default, + DiagnosticsEngine *Diags, unsigned Base) { return getLastArgIntValueImpl(Args, Id, Default, Diags, Base); } -} // namespace clang +StringRef clang::parseMPreferVectorWidthOption(clang::DiagnosticsEngine &Diags, + const llvm::opt::ArgList &Args) { + const Arg *A = Args.getLastArg(options::OPT_mprefer_vector_width_EQ); + if (!A) + return ""; + + StringRef Value = A->getValue(); + unsigned Width LLVM_ATTRIBUTE_UNINITIALIZED; + + // Only "none" and Integer values are accepted by + // -mprefer-vector-width=. + if (Value != "none" && Value.getAsInteger(10, Width)) { + Diags.Report(clang::diag::err_drv_invalid_value) + << A->getOption().getName() << Value; + return ""; + } + + return Value; +} + +// This is a helper function for validating the optional refinement step +// parameter in reciprocal argument strings. Return false if there is an error +// parsing the refinement step. Otherwise, return true and set the Position +// of the refinement step in the input string. +static bool getRefinementStep(StringRef In, clang::DiagnosticsEngine &Diags, + const Arg &A, size_t &Position) { + const char RefinementStepToken = ':'; + Position = In.find(RefinementStepToken); + if (Position != StringRef::npos) { + StringRef Option = A.getOption().getName(); + StringRef RefStep = In.substr(Position + 1); + // Allow exactly one numeric character for the additional refinement + // step parameter. This is reasonable for all currently-supported + // operations and architectures because we would expect that a larger value + // of refinement steps would cause the estimate "optimization" to + // under-perform the native operation. Also, if the estimate does not + // converge quickly, it probably will not ever converge, so further + // refinement steps will not produce a better answer. + if (RefStep.size() != 1) { + Diags.Report(diag::err_drv_invalid_value) << Option << RefStep; + return false; + } + char RefStepChar = RefStep[0]; + if (RefStepChar < '0' || RefStepChar > '9') { + Diags.Report(diag::err_drv_invalid_value) << Option << RefStep; + return false; + } + } + return true; +} + +StringRef clang::parseMRecipOption(clang::DiagnosticsEngine &Diags, + const ArgList &Args) { + StringRef DisabledPrefixIn = "!"; + StringRef DisabledPrefixOut = "!"; + StringRef EnabledPrefixOut = ""; + StringRef Out = ""; + + const Arg *A = Args.getLastArg(options::OPT_mrecip, options::OPT_mrecip_EQ); + if (!A) + return ""; + + const unsigned NumOptions = A->getNumValues(); + if (NumOptions == 0) { + // No option is the same as "all". + return "all"; + } + + // Pass through "all", "none", or "default" with an optional refinement step. + if (NumOptions == 1) { + StringRef Val = A->getValue(0); + size_t RefStepLoc; + if (!getRefinementStep(Val, Diags, *A, RefStepLoc)) + return ""; + StringRef ValBase = Val.slice(0, RefStepLoc); + if (ValBase == "all" || ValBase == "none" || ValBase == "default") { + return Val; + } + } + + // Each reciprocal type may be enabled or disabled individually. + // Check each input value for validity, concatenate them all back together, + // and pass through. + + llvm::StringMap OptionStrings; + OptionStrings.insert(std::make_pair("divd", false)); + OptionStrings.insert(std::make_pair("divf", false)); + OptionStrings.insert(std::make_pair("divh", false)); + OptionStrings.insert(std::make_pair("vec-divd", false)); + OptionStrings.insert(std::make_pair("vec-divf", false)); + OptionStrings.insert(std::make_pair("vec-divh", false)); + OptionStrings.insert(std::make_pair("sqrtd", false)); + OptionStrings.insert(std::make_pair("sqrtf", false)); + OptionStrings.insert(std::make_pair("sqrth", false)); + OptionStrings.insert(std::make_pair("vec-sqrtd", false)); + OptionStrings.insert(std::make_pair("vec-sqrtf", false)); + OptionStrings.insert(std::make_pair("vec-sqrth", false)); + + for (unsigned i = 0; i != NumOptions; ++i) { + StringRef Val = A->getValue(i); + + bool IsDisabled = Val.starts_with(DisabledPrefixIn); + // Ignore the disablement token for string matching. + if (IsDisabled) + Val = Val.substr(1); + + size_t RefStep; + if (!getRefinementStep(Val, Diags, *A, RefStep)) + return ""; + + StringRef ValBase = Val.slice(0, RefStep); + llvm::StringMap::iterator OptionIter = OptionStrings.find(ValBase); + if (OptionIter == OptionStrings.end()) { + // Try again specifying float suffix. + OptionIter = OptionStrings.find(ValBase.str() + 'f'); + if (OptionIter == OptionStrings.end()) { + // The input name did not match any known option string. + Diags.Report(diag::err_drv_unknown_argument) << Val; + return ""; + } + // The option was specified without a half or float or double suffix. + // Make sure that the double or half entry was not already specified. + // The float entry will be checked below. + if (OptionStrings[ValBase.str() + 'd'] || + OptionStrings[ValBase.str() + 'h']) { + Diags.Report(diag::err_drv_invalid_value) + << A->getOption().getName() << Val; + return ""; + } + } + + if (OptionIter->second == true) { + // Duplicate option specified. + Diags.Report(diag::err_drv_invalid_value) + << A->getOption().getName() << Val; + return ""; + } + + // Mark the matched option as found. Do not allow duplicate specifiers. + OptionIter->second = true; + + // If the precision was not specified, also mark the double and half entry + // as found. + if (ValBase.back() != 'f' && ValBase.back() != 'd' && + ValBase.back() != 'h') { + OptionStrings[ValBase.str() + 'd'] = true; + OptionStrings[ValBase.str() + 'h'] = true; + } + + // Build the output string. + StringRef Prefix = IsDisabled ? DisabledPrefixOut : EnabledPrefixOut; + Out = Args.MakeArgString(Out + Prefix + Val); + if (i != NumOptions - 1) + Out = Args.MakeArgString(Out + ","); + } + + return Out; +} + +std::string clang::GetResourcesPath(StringRef BinaryPath) { + // Since the resource directory is embedded in the module hash, it's important + // that all places that need it call this function, so that they get the + // exact same string ("a/../b/" and "b/" get different hashes, for example). + + // Dir is bin/ or lib/, depending on where BinaryPath is. + StringRef Dir = llvm::sys::path::parent_path(BinaryPath); + SmallString<128> P(Dir); + + StringRef ConfiguredResourceDir(CLANG_RESOURCE_DIR); + if (!ConfiguredResourceDir.empty()) { + // FIXME: We should fix the behavior of llvm::sys::path::append so we don't + // need to check for absolute paths here. + if (llvm::sys::path::is_absolute(ConfiguredResourceDir)) + P = ConfiguredResourceDir; + else + llvm::sys::path::append(P, ConfiguredResourceDir); + } else { + // On Windows, libclang.dll is in bin/. + // On non-Windows, libclang.so/.dylib is in lib/. + // With a static-library build of libclang, LibClangPath will contain the + // path of the embedding binary, which for LLVM binaries will be in bin/. + // ../lib gets us to lib/ in both cases. + P = llvm::sys::path::parent_path(Dir); + // This search path is also created in the COFF driver of lld, so any + // changes here also needs to happen in lld/COFF/Driver.cpp + llvm::sys::path::append(P, CLANG_INSTALL_LIBDIR_BASENAME, "clang", + CLANG_VERSION_MAJOR_STRING); + } + + return std::string(P); +} + +std::string clang::GetResourcesPath(const char *Argv0, void *MainAddr) { + const std::string ClangExecutable = + llvm::sys::fs::getMainExecutable(Argv0, MainAddr); + return GetResourcesPath(ClangExecutable); +} diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 9841453a37d35..ad9929f6194dc 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -6753,14 +6753,13 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, checkDirectCallValidity(*this, Fn, FD, ArgExprs); - // If this expression is a call to a builtin function in HIP device - // compilation, allow a pointer-type argument to default address space to be - // passed as a pointer-type parameter to a non-default address space. - // If Arg is declared in the default address space and Param is declared - // in a non-default address space, perform an implicit address space cast to - // the parameter type. - if (getLangOpts().HIP && getLangOpts().CUDAIsDevice && FD && - FD->getBuiltinID()) { + // If this expression is a call to a builtin function in HIP compilation, + // allow a pointer-type argument to default address space to be passed as a + // pointer-type parameter to a non-default address space. If Arg is declared + // in the default address space and Param is declared in a non-default + // address space, perform an implicit address space cast to the parameter + // type. + if (getLangOpts().HIP && FD && FD->getBuiltinID()) { for (unsigned Idx = 0; Idx < ArgExprs.size() && Idx < FD->param_size(); ++Idx) { ParmVarDecl *Param = FD->getParamDecl(Idx); diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp index 9bae12454d2dc..1d55f615de8a9 100644 --- a/clang/lib/Tooling/Tooling.cpp +++ b/clang/lib/Tooling/Tooling.cpp @@ -31,6 +31,7 @@ #include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/PreprocessorOptions.h" +#include "clang/Options/OptionUtils.h" #include "clang/Options/Options.h" #include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" @@ -510,8 +511,7 @@ static void injectResourceDir(CommandLineArguments &Args, const char *Argv0, // If there's no override in place add our resource dir. Args = getInsertArgumentAdjuster( - ("-resource-dir=" + CompilerInvocation::GetResourcesPath(Argv0, MainAddr)) - .c_str())(Args, ""); + ("-resource-dir=" + GetResourcesPath(Argv0, MainAddr)).c_str())(Args, ""); } int ClangTool::run(ToolAction *Action) { diff --git a/clang/test/DebugInfo/Generic/dbg-info-all-calls-described.c b/clang/test/DebugInfo/Generic/dbg-info-all-calls-described.c index 3ca3aaa0b70f4..0ba4767c8ddda 100644 --- a/clang/test/DebugInfo/Generic/dbg-info-all-calls-described.c +++ b/clang/test/DebugInfo/Generic/dbg-info-all-calls-described.c @@ -59,6 +59,13 @@ // RUN: -debug-info-kind=standalone -dwarf-version=4 \ // RUN: | FileCheck %s -check-prefix=NO-ATTR +// Disabled by feature flag (enabled by default) +// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple %s -o - \ +// RUN: -O1 -disable-llvm-passes \ +// RUN: -debug-info-kind=standalone -dwarf-version=5 \ +// RUN: -gno-call-site-info \ +// RUN: | FileCheck %s -check-prefix=NO-ATTR + // NO-ATTR-NOT: FlagAllCallsDescribed // HAS-ATTR-DAG: DISubprogram(name: "declaration1", {{.*}}, spFlags: DISPFlagOptimized) diff --git a/clang/test/Driver/debug-options.c b/clang/test/Driver/debug-options.c index 45ac450ac8faa..27e2728f15948 100644 --- a/clang/test/Driver/debug-options.c +++ b/clang/test/Driver/debug-options.c @@ -297,6 +297,9 @@ // RUN: %clang -### -g -gno-column-info %s 2>&1 \ // RUN: | FileCheck -check-prefix=NOCI %s // +// RUN: %clang -### -g -gno-call-site-info %s 2>&1 \ +// RUN: | FileCheck -check-prefix=NOCALLSITE %s +// // RUN: %clang -### -g -target x86_64-unknown-unknown %s 2>&1 \ // | FileCheck -check-prefix=CI %s // @@ -426,6 +429,8 @@ // // NOCI-DAG: "-gno-column-info" // +// NOCALLSITE: "-gno-call-site-info" +// // GEXTREFS: "-dwarf-ext-refs" "-fmodule-format=obj" // GEXTREFS: "-debug-info-kind={{standalone|constructor}}" // NOGEXTREFS-NOT: -dwarf-ext-refs diff --git a/clang/test/Driver/hip-spirv-backend-bindings.c b/clang/test/Driver/hip-spirv-backend-bindings.c new file mode 100644 index 0000000000000..59b3f4fb54d4c --- /dev/null +++ b/clang/test/Driver/hip-spirv-backend-bindings.c @@ -0,0 +1,57 @@ +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -x hip %s -save-temps \ +// RUN: -use-spirv-backend -ccc-print-bindings \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-SPIRV-BASE,CHECK-SPIRV + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -x hip %s -save-temps \ +// RUN: -use-spirv-backend -fgpu-rdc -ccc-print-bindings \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-SPIRV-BASE,CHECK-SPIRV-RDC + +// CHECK-SPIRV-BASE: # "spirv64-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HIPI:.+\.hipi]]" +// CHECK-SPIRV-BASE: # "spirv64-amd-amdhsa" - "clang", inputs: ["[[HIPI]]"], output: "[[SPV_BC:.+\.bc]]" +// CHECK-SPIRV: # "spirv64-amd-amdhsa" - "Offload::Packager", inputs: ["[[SPV_BC]]"], output: "[[HIP_OUT:.+\.out]]" +// CHECK-SPIRV: # "spirv64-amd-amdhsa" - "Offload::Linker", inputs: ["[[HIP_OUT]]"], output: "[[HIPFB:.+\.hipfb]]" +// CHECK-SPIRV-RDC: # "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[SPV_BC]]"], output: "[[HIP_OUT:.+\.out]]" +// CHECK-SPIRV-BASE: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]"], output: "[[HIPI:.+\.hipi]]" +// CHECK-SPIRV: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HIPI]]", "[[HIPFB]]"], output: "[[x86_BC:.+\.bc]]" +// CHECK-SPIRV-RDC: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HIPI]]", "[[HIP_OUT]]"], output: "[[x86_BC:.+\.bc]]" +// CHECK-SPIRV-BASE: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[x86_BC]]"], output: "[[x86_S:.+\.s]]" +// CHECK-SPIRV-BASE: # "x86_64-unknown-linux-gnu" - "clang::as", inputs: ["[[x86_S]]"], output: "[[x86_O:.+\.o]]" +// CHECK-SPIRV-BASE: # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[x86_O]]"], output: "{{.+\.out}}" + +// CHECK-SPIRV # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[x86_O]]"], output: "[[x86_O:.+\.o]]" +// CHECK-SPIRV # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[x86_O]]"], output: "{{.+\.out}}" + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -x hip %s -save-temps \ +// RUN: -use-spirv-backend --offload-device-only -ccc-print-bindings \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-SPIRV-OFFLOAD-DEVICE-ONLY + +// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY: # "spirv64-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HIPI:.+\.hipi]]" +// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY: # "spirv64-amd-amdhsa" - "clang", inputs: ["[[HIPI]]"], output: "[[SPV_BC:.+\.bc]]" +// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY: # "spirv64-amd-amdhsa" - "clang", inputs: ["[[SPV_BC]]"], output: "[[SPV_OUT:.+\.out]]" +// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY: # "spirv64-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[SPV_OUT]]"], output: "{{.+\.hipfb}}" + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -x hip %s -save-temps \ +// RUN: -use-spirv-backend --offload-device-only -fgpu-rdc -ccc-print-bindings \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC + +// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: # "spirv64-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HIPI:.+\.hipi]]" +// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: # "spirv64-amd-amdhsa" - "clang", inputs: ["[[HIPI]]"], output: "[[SPV_BC:.+\.bc]]" +// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: # "spirv64-amd-amdhsa" - "clang", inputs: ["[[SPV_BC]]"], output: "{{.+}}" + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -x hip %s -save-temps \ +// RUN: -use-spirv-backend --offload-device-only -S -fgpu-rdc -ccc-print-bindings \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -x hip %s -save-temps \ +// RUN: -use-spirv-backend --offload-device-only -S -ccc-print-bindings \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-SPIRV-TEXTUAL-OFFLOAD-DEVICE-ONLY + +// CHECK-SPIRV-TEXTUAL-OFFLOAD-DEVICE-ONLY: # "spirv64-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HIPI:.+\.hipi]]" +// CHECK-SPIRV-TEXTUAL-OFFLOAD-DEVICE-ONLY: # "spirv64-amd-amdhsa" - "clang", inputs: ["[[HIPI]]"], output: "[[SPV_BC:.+\.bc]]" +// CHECK-SPIRV-TEXTUAL-OFFLOAD-DEVICE-ONLY: # "spirv64-amd-amdhsa" - "clang", inputs: ["[[SPV_BC]]"], output: "{{.+\.s}}" diff --git a/clang/test/Driver/hip-spirv-backend-opt.c b/clang/test/Driver/hip-spirv-backend-opt.c new file mode 100644 index 0000000000000..88c4a848d5760 --- /dev/null +++ b/clang/test/Driver/hip-spirv-backend-opt.c @@ -0,0 +1,61 @@ +// This test case validates the behavior of -use-spirv-backend + +// --offload-device-only is always set --- testing interactions with -S and -fgpu-rdc + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -### -x hip %s -save-temps \ +// RUN: -use-spirv-backend --offload-device-only -S \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-SPIRV-TRANSLATOR,CHECK-SPIRV-BACKEND-TEXTUAL + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -### -x hip %s -save-temps \ +// RUN: -use-spirv-backend --offload-device-only \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-SPIRV-TRANSLATOR,CHECK-SPIRV-BACKEND-BINARY + +// The new driver's behavior is to emit LLVM IR for --offload-device-only and -fgpu-rdc (independently of SPIR-V). +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -### -nogpuinc -nogpulib -x hip %s -save-temps \ +// RUN: -use-spirv-backend --offload-device-only -S -fgpu-rdc \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-SPIRV-TRANSLATOR,CHECK-SPIRV-BACKEND-LL,CHECK-FGPU-RDC + +// The new driver's behavior is to emit LLVM IR for --offload-device-only and -fgpu-rdc (independently of SPIR-V). +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -### -x hip %s -save-temps \ +// RUN: -use-spirv-backend --offload-device-only -fgpu-rdc \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-SPIRV-TRANSLATOR,CHECK-SPIRV-BACKEND-BC,CHECK-FGPU-RDC + +// --offload-device-only is always unset --- testing interactions with -S and -fgpu-rdc + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -### -x hip %s -save-temps \ +// RUN: -use-spirv-backend -S -fgpu-rdc \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-SPIRV-TRANSLATOR,CHECK-SPIRV-BACKEND-BC,CHECK-FGPU-RDC + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -### -x hip %s -save-temps \ +// RUN: -use-spirv-backend -S \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-SPIRV-TRANSLATOR,CHECK-SPIRV-BACKEND-BC + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -### -x hip %s -save-temps \ +// RUN: -use-spirv-backend -fgpu-rdc \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-SPIRV-TRANSLATOR,CHECK-SPIRV-BACKEND-BC,CHECK-CLANG-LINKER-WRAPPER + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -### -x hip %s -save-temps \ +// RUN: -use-spirv-backend \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-SPIRV-TRANSLATOR,CHECK-SPIRV-BACKEND-BC,CHECK-CLANG-LINKER-WRAPPER + +// RUN: %clang --no-offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -### -x hip %s -save-temps \ +// RUN: -use-spirv-backend \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-SPIRV-TRANSLATOR,CHECK-SPIRV-BACKEND-BC,CHECK-SPIRV-BACKEND-BINARY-EQ-TRIPLE + +// CHECK-SPIRV-TRANSLATOR-NOT: "{{.*llvm-spirv.*}}" +// CHECK-SPIRV-BACKEND-TEXTUAL: "{{.*}}clang{{.*}}" "-cc1" "-triple" "spirv64-amd-amdhsa" {{.*}} "-S" +// CHECK-SPIRV-BACKEND-BINARY: "{{.*}}clang{{.*}}" "-cc1" "-triple" "spirv64-amd-amdhsa" {{.*}} "-emit-obj" +// CHECK-SPIRV-BACKEND-BC: "{{.*}}clang{{.*}}" "-cc1" "-triple" "spirv64-amd-amdhsa" {{.*}} "-emit-llvm-bc" +// CHECK-SPIRV-BACKEND-LL: "{{.*}}clang{{.*}}" "-cc1" "-triple" "spirv64-amd-amdhsa" {{.*}} "-emit-llvm" +// CHECK-SPIRV-BACKEND-BINARY-EQ-TRIPLE: "{{.*}}clang{{.*}}" "-cc1" {{.*}}"-triple=spirv64-amd-amdhsa" {{.*}}"-emit-obj" +// CHECK-FGPU-RDC-SAME: {{.*}} "-fgpu-rdc" +// CHECK-CLANG-LINKER-WRAPPER: "{{.*}}clang-linker-wrapper" "--should-extract=amdgcnspirv" {{.*}} "--device-compiler=spirv64-amd-amdhsa=-use-spirv-backend" diff --git a/clang/test/Driver/hip-spirv-backend-phases.c b/clang/test/Driver/hip-spirv-backend-phases.c new file mode 100644 index 0000000000000..d743b8cd50c40 --- /dev/null +++ b/clang/test/Driver/hip-spirv-backend-phases.c @@ -0,0 +1,80 @@ +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -x hip %s -save-temps \ +// RUN: -use-spirv-backend -ccc-print-phases \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-SPIRV-BINARY + +// CHECK-SPIRV-BINARY: [[P0:[0-9]+]]: input, "[[INPUT:.*]].c", hip, (host-hip) +// CHECK-SPIRV-BINARY: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, hip-cpp-output, (host-hip) +// CHECK-SPIRV-BINARY: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-hip) + +// CHECK-SPIRV-BINARY: [[P3:[0-9]+]]: input, "[[INPUT]].c", hip, (device-hip, amdgcnspirv) +// CHECK-SPIRV-BINARY: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, hip-cpp-output, (device-hip, amdgcnspirv) +// CHECK-SPIRV-BINARY: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-hip, amdgcnspirv) +// CHECK-SPIRV-BINARY: [[P6:[0-9]+]]: offload, "device-hip (spirv64-amd-amdhsa:amdgcnspirv)" {[[P5]]}, ir +// CHECK-SPIRV-BINARY: [[P7:[0-9]+]]: llvm-offload-binary, {[[P6]]}, image, (device-hip) +// CHECK-SPIRV-BINARY: [[P8:[0-9]+]]: clang-linker-wrapper, {[[P7]]}, hip-fatbin, (device-hip) + +// CHECK-SPIRV-BINARY: [[P9:[0-9]+]]: offload, "host-hip (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-hip (spirv64-amd-amdhsa)" {[[P8]]}, ir +// CHECK-SPIRV-BINARY: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (host-hip) +// CHECK-SPIRV-BINARY: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (host-hip) +// CHECK-SPIRV-BINARY: [[P12:[0-9]+]]: clang-linker-wrapper, {[[P11]]}, image, (host-hip) + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -x hip %s -save-temps \ +// RUN: -use-spirv-backend -fgpu-rdc -ccc-print-phases \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-SPIRV-BINARY-RDC + +// CHECK-SPIRV-BINARY-RDC: [[P0:[0-9]+]]: input, "[[INPUT:.*]].c", hip, (host-hip) +// CHECK-SPIRV-BINARY-RDC: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, hip-cpp-output, (host-hip) +// CHECK-SPIRV-BINARY-RDC: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-hip) + +// CHECK-SPIRV-BINARY-RDC: [[P3:[0-9]+]]: input, "[[INPUT]].c", hip, (device-hip, amdgcnspirv) +// CHECK-SPIRV-BINARY-RDC: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, hip-cpp-output, (device-hip, amdgcnspirv) +// CHECK-SPIRV-BINARY-RDC: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-hip, amdgcnspirv) +// CHECK-SPIRV-BINARY-RDC: [[P6:[0-9]+]]: offload, "device-hip (spirv64-amd-amdhsa:amdgcnspirv)" {[[P5]]}, ir +// CHECK-SPIRV-BINARY-RDC: [[P7:[0-9]+]]: llvm-offload-binary, {[[P6]]}, image, (device-hip) + +// CHECK-SPIRV-BINARY-RDC: [[P8:[0-9]+]]: offload, "host-hip (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-hip (x86_64-unknown-linux-gnu)" {[[P7]]}, ir +// CHECK-SPIRV-BINARY-RDC: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (host-hip) +// CHECK-SPIRV-BINARY-RDC: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (host-hip) +// CHECK-SPIRV-BINARY-RDC: [[P11:[0-9]+]]: clang-linker-wrapper, {[[P10]]}, image, (host-hip) + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -x hip %s -save-temps \ +// RUN: -use-spirv-backend --offload-device-only -ccc-print-phases \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY + +// CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P0:[0-9]+]]: input, "{{.*}}.c", hip, (device-hip, amdgcnspirv) +// CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, hip-cpp-output, (device-hip, amdgcnspirv) +// CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-hip, amdgcnspirv) +// CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P3:[0-9]+]]: backend, {[[P2]]}, image, (device-hip, amdgcnspirv) +// CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P4:[0-9]+]]: offload, "device-hip (spirv64-amd-amdhsa:amdgcnspirv)" {[[P3]]}, image +// CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P5:[0-9]+]]: linker, {[[P4]]}, hip-fatbin, (device-hip) +// CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P6:[0-9]+]]: offload, "device-hip (spirv64-amd-amdhsa)" {[[P5]]}, none + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -x hip %s -save-temps \ +// RUN: -use-spirv-backend --offload-device-only -fgpu-rdc -ccc-print-phases \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC + +// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: [[P0:[0-9]+]]: input, "{{.*}}.c", hip, (device-hip, amdgcnspirv) +// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, hip-cpp-output, (device-hip, amdgcnspirv) +// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-hip, amdgcnspirv) +// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: [[P3:[0-9]+]]: backend, {[[P2]]}, ir, (device-hip, amdgcnspirv) +// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: [[P4:[0-9]+]]: offload, "device-hip (spirv64-amd-amdhsa:amdgcnspirv)" {[[P3]]}, none + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -x hip %s -save-temps \ +// RUN: -use-spirv-backend --offload-device-only -S -fgpu-rdc -ccc-print-phases \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC + +// RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu --offload-arch=amdgcnspirv \ +// RUN: -nogpuinc -nogpulib -x hip %s -save-temps \ +// RUN: -use-spirv-backend --offload-device-only -S -ccc-print-phases \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-SPIRV-TEXTUAL-OFFLOAD-DEVICE-ONLY + +// CHECK-SPIRV-TEXTUAL-OFFLOAD-DEVICE-ONLY: [[P0:[0-9]+]]: input, "{{.*}}.c", hip, (device-hip, amdgcnspirv) +// CHECK-SPIRV-TEXTUAL-OFFLOAD-DEVICE-ONLY: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, hip-cpp-output, (device-hip, amdgcnspirv) +// CHECK-SPIRV-TEXTUAL-OFFLOAD-DEVICE-ONLY: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-hip, amdgcnspirv) +// CHECK-SPIRV-TEXTUAL-OFFLOAD-DEVICE-ONLY: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-hip, amdgcnspirv) +// CHECK-SPIRV-TEXTUAL-OFFLOAD-DEVICE-ONLY: [[P4:[0-9]+]]: offload, "device-hip (spirv64-amd-amdhsa:amdgcnspirv)" {[[P3]]}, none diff --git a/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip b/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip index 366278f648939..b49c1866caa1c 100644 --- a/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip +++ b/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip @@ -1,7 +1,7 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx950 -verify=device %s -fcuda-is-device -// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify=host %s -// device-no-diagnostics +// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx950 -verify %s -fcuda-is-device +// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s +// expected-no-diagnostics #define __device__ __attribute__((device)) #define __global__ __attribute__((global)) @@ -20,11 +20,11 @@ __device__ void i_am_device(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ v __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0); __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0); - __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}} - __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}} - __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}} - __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}} - __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}} + __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); + __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); + __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0); + __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0); + __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0); __builtin_amdgcn_global_load_lds(src, dst, 1, 0 , 0); __builtin_amdgcn_global_load_lds(src, dst, 2, 0 , 0); @@ -46,11 +46,11 @@ __global__ void i_am_kernel(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ v __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0); __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0); - __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}} - __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}} - __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}} - __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}} - __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}} + __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); + __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); + __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0); + __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0); + __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0); __builtin_amdgcn_global_load_lds(src, dst, 1, 0 , 0); __builtin_amdgcn_global_load_lds(src, dst, 2, 0 , 0); diff --git a/clang/tools/c-index-test/CMakeLists.txt b/clang/tools/c-index-test/CMakeLists.txt index 24e7c9692ca56..41e80e66ffa7a 100644 --- a/clang/tools/c-index-test/CMakeLists.txt +++ b/clang/tools/c-index-test/CMakeLists.txt @@ -27,6 +27,7 @@ else() libclang clangAST clangBasic + clangDriver clangFrontend clangIndex clangSerialization diff --git a/clang/tools/c-index-test/core_main.cpp b/clang/tools/c-index-test/core_main.cpp index 5a3086a7fc08f..c67479fd130ca 100644 --- a/clang/tools/c-index-test/core_main.cpp +++ b/clang/tools/c-index-test/core_main.cpp @@ -8,6 +8,7 @@ #include "clang/AST/Mangle.h" #include "clang/Basic/LangOptions.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" diff --git a/clang/tools/diagtool/CMakeLists.txt b/clang/tools/diagtool/CMakeLists.txt index b49619c075c73..09b2a81790f87 100644 --- a/clang/tools/diagtool/CMakeLists.txt +++ b/clang/tools/diagtool/CMakeLists.txt @@ -15,5 +15,6 @@ add_clang_tool(diagtool clang_target_link_libraries(diagtool PRIVATE clangBasic + clangDriver clangFrontend ) diff --git a/clang/tools/diagtool/ShowEnabledWarnings.cpp b/clang/tools/diagtool/ShowEnabledWarnings.cpp index bea0288c09358..5b25e656dafa4 100644 --- a/clang/tools/diagtool/ShowEnabledWarnings.cpp +++ b/clang/tools/diagtool/ShowEnabledWarnings.cpp @@ -9,6 +9,7 @@ #include "DiagTool.h" #include "DiagnosticNames.h" #include "clang/Basic/LLVM.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/TextDiagnosticBuffer.h" #include "clang/Frontend/TextDiagnosticPrinter.h" diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp index 300d59df1bf7b..cc757039cafd0 100644 --- a/clang/tools/driver/cc1_main.cpp +++ b/clang/tools/driver/cc1_main.cpp @@ -17,6 +17,7 @@ #include "clang/Basic/TargetOptions.h" #include "clang/CodeGen/ObjectFilePCHContainerWriter.h" #include "clang/Config/config.h" +#include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" @@ -269,7 +270,7 @@ int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { if (Clang->getHeaderSearchOpts().UseBuiltinIncludes && Clang->getHeaderSearchOpts().ResourceDir.empty()) Clang->getHeaderSearchOpts().ResourceDir = - CompilerInvocation::GetResourcesPath(Argv0, MainAddr); + GetResourcesPath(Argv0, MainAddr); /// Create the actual file system. Clang->createVirtualFileSystem(llvm::vfs::getRealFileSystem(), DiagsBuffer); diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index f4d6fa72a1dfe..32e84248c1b27 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -38,6 +38,7 @@ #include "clang/Basic/Stack.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Version.h" +#include "clang/Driver/CreateASTUnitFromArgs.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Index/CommentToXML.h" @@ -4361,7 +4362,7 @@ clang_parseTranslationUnit_Impl(CXIndex CIdx, const char *source_filename, LibclangInvocationReporter InvocationReporter( *CXXIdx, LibclangInvocationReporter::OperationKind::ParseOperation, options, llvm::ArrayRef(*Args), /*InvocationArgs=*/{}, unsaved_files); - std::unique_ptr Unit = ASTUnit::LoadFromCommandLine( + std::unique_ptr Unit = CreateASTUnitFromCommandLine( Args->data(), Args->data() + Args->size(), CXXIdx->getPCHContainerOperations(), DiagOpts, Diags, CXXIdx->getClangResourcesPath(), CXXIdx->getStorePreamblesInMemory(), diff --git a/clang/tools/libclang/CIndexer.cpp b/clang/tools/libclang/CIndexer.cpp index 11d9312b64849..853a936b43e37 100644 --- a/clang/tools/libclang/CIndexer.cpp +++ b/clang/tools/libclang/CIndexer.cpp @@ -16,6 +16,7 @@ #include "clang/Basic/Version.h" #include "clang/Config/config.h" #include "clang/Driver/Driver.h" +#include "clang/Options/OptionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/FileSystem.h" @@ -137,7 +138,7 @@ const std::string &CIndexer::getClangResourcesPath() { #endif // Cache our result. - ResourcesPath = driver::Driver::GetResourcesPath(LibClangPath); + ResourcesPath = GetResourcesPath(LibClangPath); return ResourcesPath; } diff --git a/clang/tools/libclang/CMakeLists.txt b/clang/tools/libclang/CMakeLists.txt index e0ff7605b68b8..b0105f5a5f79f 100644 --- a/clang/tools/libclang/CMakeLists.txt +++ b/clang/tools/libclang/CMakeLists.txt @@ -65,6 +65,7 @@ set(LIBS clangFrontend clangIndex clangLex + clangOptions clangRewrite clangSema clangSerialization diff --git a/clang/tools/libclang/Indexing.cpp b/clang/tools/libclang/Indexing.cpp index c142f142d5071..75323d70afcfe 100644 --- a/clang/tools/libclang/Indexing.cpp +++ b/clang/tools/libclang/Indexing.cpp @@ -15,6 +15,7 @@ #include "CXString.h" #include "CXTranslationUnit.h" #include "clang/AST/ASTConsumer.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" diff --git a/clang/unittests/Driver/DXCModeTest.cpp b/clang/unittests/Driver/DXCModeTest.cpp index e0454f190b35a..130da620b40b5 100644 --- a/clang/unittests/Driver/DXCModeTest.cpp +++ b/clang/unittests/Driver/DXCModeTest.cpp @@ -15,6 +15,7 @@ #include "clang/Basic/LLVM.h" #include "clang/Basic/TargetOptions.h" #include "clang/Driver/Compilation.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Driver/Driver.h" #include "clang/Driver/ToolChain.h" #include "clang/Frontend/CompilerInstance.h" diff --git a/clang/unittests/Driver/ToolChainTest.cpp b/clang/unittests/Driver/ToolChainTest.cpp index afa17ff219be2..8f533790ec501 100644 --- a/clang/unittests/Driver/ToolChainTest.cpp +++ b/clang/unittests/Driver/ToolChainTest.cpp @@ -17,6 +17,7 @@ #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" #include "clang/Driver/Compilation.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Driver/Driver.h" #include "clang/Frontend/CompilerInstance.h" #include "llvm/ADT/ArrayRef.h" diff --git a/clang/unittests/Frontend/ASTUnitTest.cpp b/clang/unittests/Frontend/ASTUnitTest.cpp index dfdbe90e72f1f..bf9e4e184b5db 100644 --- a/clang/unittests/Frontend/ASTUnitTest.cpp +++ b/clang/unittests/Frontend/ASTUnitTest.cpp @@ -9,6 +9,8 @@ #include #include "clang/Basic/FileManager.h" +#include "clang/Driver/CreateASTUnitFromArgs.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" @@ -173,7 +175,7 @@ TEST_F(ASTUnitTest, LoadFromCommandLineEarlyError) { auto PCHContainerOps = std::make_shared(); std::unique_ptr ErrUnit; - std::unique_ptr AST = ASTUnit::LoadFromCommandLine( + std::unique_ptr AST = CreateASTUnitFromCommandLine( &Args[0], &Args[4], PCHContainerOps, DiagOpts, Diags, "", false, "", false, CaptureDiagsKind::All, {}, true, 0, TU_Complete, false, false, false, SkipFunctionBodiesScope::None, false, true, false, false, @@ -201,7 +203,7 @@ TEST_F(ASTUnitTest, LoadFromCommandLineWorkingDirectory) { auto PCHContainerOps = std::make_shared(); std::unique_ptr ErrUnit; - std::unique_ptr AST = ASTUnit::LoadFromCommandLine( + std::unique_ptr AST = CreateASTUnitFromCommandLine( &Args[0], &Args[4], PCHContainerOps, DiagOpts, Diags, "", false, "", false, CaptureDiagsKind::All, {}, true, 0, TU_Complete, false, false, false, SkipFunctionBodiesScope::None, false, true, false, false, diff --git a/clang/unittests/Frontend/CompilerInstanceTest.cpp b/clang/unittests/Frontend/CompilerInstanceTest.cpp index cd3fefa1ea994..39d35b48f394a 100644 --- a/clang/unittests/Frontend/CompilerInstanceTest.cpp +++ b/clang/unittests/Frontend/CompilerInstanceTest.cpp @@ -8,6 +8,7 @@ #include "clang/Frontend/CompilerInstance.h" #include "clang/Basic/FileManager.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Frontend/TextDiagnosticPrinter.h" diff --git a/clang/unittests/Frontend/UtilsTest.cpp b/clang/unittests/Frontend/UtilsTest.cpp index fc411e4af705f..a82733d57714a 100644 --- a/clang/unittests/Frontend/UtilsTest.cpp +++ b/clang/unittests/Frontend/UtilsTest.cpp @@ -9,6 +9,7 @@ #include "clang/Frontend/Utils.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/TargetOptions.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Lex/PreprocessorOptions.h" diff --git a/clang/unittests/Sema/CMakeLists.txt b/clang/unittests/Sema/CMakeLists.txt index b61ed8c457635..188f6135a60ac 100644 --- a/clang/unittests/Sema/CMakeLists.txt +++ b/clang/unittests/Sema/CMakeLists.txt @@ -13,6 +13,7 @@ add_distinct_clang_unittest(SemaTests clangAST clangASTMatchers clangBasic + clangDriver clangFrontend clangParse clangSema diff --git a/clang/unittests/Sema/SemaNoloadLookupTest.cpp b/clang/unittests/Sema/SemaNoloadLookupTest.cpp index e565372698e5e..3944269eff502 100644 --- a/clang/unittests/Sema/SemaNoloadLookupTest.cpp +++ b/clang/unittests/Sema/SemaNoloadLookupTest.cpp @@ -10,6 +10,7 @@ #include "clang/AST/DeclarationName.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Frontend/FrontendActions.h" diff --git a/clang/unittests/Serialization/ForceCheckFileInputTest.cpp b/clang/unittests/Serialization/ForceCheckFileInputTest.cpp index edf33ae04230b..b76dcfec96063 100644 --- a/clang/unittests/Serialization/ForceCheckFileInputTest.cpp +++ b/clang/unittests/Serialization/ForceCheckFileInputTest.cpp @@ -9,6 +9,7 @@ #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Basic/FileManager.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" diff --git a/clang/unittests/Serialization/LoadSpecLazilyTest.cpp b/clang/unittests/Serialization/LoadSpecLazilyTest.cpp index d7b55491fddac..f55925aeae1f2 100644 --- a/clang/unittests/Serialization/LoadSpecLazilyTest.cpp +++ b/clang/unittests/Serialization/LoadSpecLazilyTest.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Frontend/FrontendActions.h" diff --git a/clang/unittests/Serialization/ModuleCacheTest.cpp b/clang/unittests/Serialization/ModuleCacheTest.cpp index e9b8da3dba6af..df26e54588b9e 100644 --- a/clang/unittests/Serialization/ModuleCacheTest.cpp +++ b/clang/unittests/Serialization/ModuleCacheTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Basic/FileManager.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" diff --git a/clang/unittests/Serialization/NoCommentsTest.cpp b/clang/unittests/Serialization/NoCommentsTest.cpp index 01bb6999a7c90..444a082bba907 100644 --- a/clang/unittests/Serialization/NoCommentsTest.cpp +++ b/clang/unittests/Serialization/NoCommentsTest.cpp @@ -9,6 +9,7 @@ #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Basic/FileManager.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" diff --git a/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp b/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp index 55ee72875ead2..b826f20ce4d70 100644 --- a/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp +++ b/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" diff --git a/clang/unittests/Serialization/VarDeclConstantInitTest.cpp b/clang/unittests/Serialization/VarDeclConstantInitTest.cpp index 743f851fc5fe1..2be01def49809 100644 --- a/clang/unittests/Serialization/VarDeclConstantInitTest.cpp +++ b/clang/unittests/Serialization/VarDeclConstantInitTest.cpp @@ -9,6 +9,7 @@ #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Basic/FileManager.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp index 47184cbf5d768..468ca5ddd2c75 100644 --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -20,6 +20,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.def" #include "clang/Basic/TokenKinds.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Frontend/Utils.h" diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp index b2be64fc08f3d..dad75854240ef 100644 --- a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -13,6 +13,7 @@ #include "TreeTestBase.h" #include "clang/AST/ASTConsumer.h" #include "clang/Basic/LLVM.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendAction.h" diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index ca45d7bd2af7f..f2317de8916e9 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -89,7 +89,7 @@ else() set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X} ${LOONGARCH64} ${RISCV64}) endif() -set(ALL_TYSAN_SUPPORTED_ARCH ${X86_64} ${ARM64}) +set(ALL_TYSAN_SUPPORTED_ARCH ${X86_64} ${ARM64} ${S390X}) set(ALL_UBSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64} ${MIPS32} ${MIPS64} ${PPC64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} ${LOONGARCH64}) @@ -102,7 +102,7 @@ endif() set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS64} ${HEXAGON} ${LOONGARCH64}) set(ALL_SCUDO_STANDALONE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} - ${MIPS32} ${MIPS64} ${PPC64} ${HEXAGON} ${LOONGARCH64} ${RISCV64}) + ${MIPS32} ${MIPS64} ${PPC64} ${HEXAGON} ${LOONGARCH64} ${RISCV64} ${S390X}) if(APPLE) set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM64}) else() diff --git a/compiler-rt/lib/tysan/tysan_platform.h b/compiler-rt/lib/tysan/tysan_platform.h index 19f77f0cace6b..7d84ef228216d 100644 --- a/compiler-rt/lib/tysan/tysan_platform.h +++ b/compiler-rt/lib/tysan/tysan_platform.h @@ -45,6 +45,12 @@ struct Mapping48 { static const uptr kPtrShift = 3; }; #define TYSAN_RUNTIME_VMA 1 +#elif defined(__s390x__) +struct Mapping { + static const uptr kShadowAddr = 0x080000000000ULL; + static const uptr kAppAddr = 0x460000000000ULL; + static const uptr kAppMemMsk = ~0xC00000000000ULL; +}; #else #error "TySan not supported for this platform!" #endif diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt index bb0b4a39cec9b..fb74b3dcb280e 100644 --- a/flang/lib/Frontend/CMakeLists.txt +++ b/flang/lib/Frontend/CMakeLists.txt @@ -75,7 +75,6 @@ add_flang_library(flangFrontend CLANG_LIBS clangBasic - clangDriver clangOptions ) diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 7cac9acefe702..76f7329d2d126 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -325,10 +325,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, for (auto *a : args.filtered(clang::options::OPT_fpass_plugin_EQ)) opts.LLVMPassPlugins.push_back(a->getValue()); - opts.Reciprocals = clang::driver::tools::parseMRecipOption(diags, args); + opts.Reciprocals = clang::parseMRecipOption(diags, args); - opts.PreferVectorWidth = - clang::driver::tools::parseMPreferVectorWidthOption(diags, args); + opts.PreferVectorWidth = clang::parseMPreferVectorWidthOption(diags, args); // -fembed-offload-object option for (auto *a : args.filtered(clang::options::OPT_fembed_offload_object_EQ)) diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index 30bca639060e6..7f880d223d6c3 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -60,6 +60,7 @@ #include "lldb/lldb-forward.h" #include "lldb/lldb-private-enumerations.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt b/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt index 01d588ff6a78b..759a7c4dd14fb 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt +++ b/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt @@ -51,10 +51,10 @@ add_lldb_library(lldbPluginExpressionParserClang CLANG_LIBS clangAST clangCodeGen - clangDriver clangEdit clangFrontend clangLex + clangOptions clangParse clangRewrite clangRewriteFrontend diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp index 6de851081598f..660a21e3c6a8d 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp @@ -10,7 +10,7 @@ #include "clang/Basic/Version.h" #include "clang/Config/config.h" -#include "clang/Driver/Driver.h" +#include "clang/Options/OptionUtils.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -53,7 +53,7 @@ static bool DefaultComputeClangResourceDirectory(FileSpec &lldb_shlib_spec, std::string raw_path = lldb_shlib_spec.GetPath(); llvm::StringRef parent_dir = llvm::sys::path::parent_path(raw_path); static const std::string clang_resource_path = - clang::driver::Driver::GetResourcesPath("bin/lldb"); + clang::GetResourcesPath("bin/lldb"); static const llvm::StringRef kResourceDirSuffixes[] = { // LLVM.org's build of LLDB uses the clang resource directory placed diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp index e37c84efefdc9..ce8dc50b84a31 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp @@ -10,6 +10,7 @@ #include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/TargetInfo.h" +#include "clang/Driver/CreateInvocationFromArgs.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Frontend/TextDiagnosticPrinter.h" diff --git a/lldb/unittests/Expression/ClangParserTest.cpp b/lldb/unittests/Expression/ClangParserTest.cpp index fab4487c73719..c949026e87cd8 100644 --- a/lldb/unittests/Expression/ClangParserTest.cpp +++ b/lldb/unittests/Expression/ClangParserTest.cpp @@ -8,7 +8,7 @@ #include "clang/Basic/Version.h" #include "clang/Config/config.h" -#include "clang/Driver/Driver.h" +#include "clang/Options/OptionUtils.h" #include "Plugins/ExpressionParser/Clang/ClangHost.h" #include "TestingSupport/SubsystemRAII.h" @@ -43,7 +43,7 @@ TEST_F(ClangHostTest, ComputeClangResourceDirectory) { std::string path_to_liblldb = "C:\\foo\\bar\\lib\\"; #endif std::string path_to_clang_dir = - clang::driver::Driver::GetResourcesPath(path_to_liblldb + "liblldb"); + clang::GetResourcesPath(path_to_liblldb + "liblldb"); llvm::SmallString<256> path_to_clang_lib_dir_real; llvm::sys::fs::real_path(path_to_clang_dir, path_to_clang_lib_dir_real); diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h index 434cfb61699d6..e2be91ba84c53 100644 --- a/llvm/include/llvm/Analysis/Delinearization.h +++ b/llvm/include/llvm/Analysis/Delinearization.h @@ -133,9 +133,8 @@ bool findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr, /// terms exist in the \p Expr. In other words, it assumes that the all step /// values are constant. /// -/// This function is intended to replace getIndexExpressionsFromGEP and -/// tryDelinearizeFixedSizeImpl. They rely on the GEP source element type so -/// that they will be removed in the future. +/// This function is intended to replace getIndexExpressionsFromGEP. They rely +/// on the GEP source element type so that will be removed in the future. bool delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl &Subscripts, SmallVectorImpl &Sizes, @@ -155,17 +154,6 @@ bool getIndexExpressionsFromGEP(ScalarEvolution &SE, SmallVectorImpl &Subscripts, SmallVectorImpl &Sizes); -/// Implementation of fixed size array delinearization. Try to delinearize -/// access function for a fixed size multi-dimensional array, by deriving -/// subscripts from GEP instructions. Returns true upon success and false -/// otherwise. \p Inst is the load/store instruction whose pointer operand is -/// the one we want to delinearize. \p AccessFn is its corresponding SCEV -/// expression w.r.t. the surrounding loop. -bool tryDelinearizeFixedSizeImpl(ScalarEvolution *SE, Instruction *Inst, - const SCEV *AccessFn, - SmallVectorImpl &Subscripts, - SmallVectorImpl &Sizes); - struct DelinearizationPrinterPass : public PassInfoMixin { explicit DelinearizationPrinterPass(raw_ostream &OS); diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index f603ae8dbd70f..04731569aa3f2 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -773,8 +773,8 @@ class DependenceInfo { SmallVectorImpl &Pair); /// Tries to delinearize \p Src and \p Dst access functions for a fixed size - /// multi-dimensional array. Calls tryDelinearizeFixedSizeImpl() to - /// delinearize \p Src and \p Dst separately, + /// multi-dimensional array. Calls delinearizeFixedSizeArray() to delinearize + /// \p Src and \p Dst separately, bool tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, const SCEV *DstAccessFn, SmallVectorImpl &SrcSubscripts, diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index cf96547063cd0..41fe448382992 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -211,6 +211,16 @@ struct RuntimeLibcallsInfo { return true; } + static bool darwinHasMemsetPattern(const Triple &TT) { + // memset_pattern{4,8,16} is only available on iOS 3.0 and Mac OS X 10.5 and + // later. All versions of watchOS support it. + if (TT.isMacOSX()) + return !TT.isMacOSXVersionLT(10, 5); + if (TT.isiOS()) + return !TT.isOSVersionLT(3, 0); + return TT.isWatchOS(); + } + static bool hasAEABILibcalls(const Triple &TT) { return TT.isTargetAEABI() || TT.isTargetGNUAEABI() || TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isAndroid(); diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index 11e6127e0741d..794ab2449bc01 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -50,6 +50,7 @@ def isWindowsMSVCOrItaniumEnvironment : RuntimeLibcallPredicate< def isGNUEnvironment : RuntimeLibcallPredicate<"TT.isGNUEnvironment()">; def darwinHasSinCosStret : RuntimeLibcallPredicate<"darwinHasSinCosStret(TT)">; def darwinHasExp10 : RuntimeLibcallPredicate<"darwinHasExp10(TT)">; +def darwinHasMemsetPattern : RuntimeLibcallPredicate<[{darwinHasMemsetPattern(TT)}]>; def hasExp10 : RuntimeLibcallPredicate<[{!TT.isOSDarwin()}]>; @@ -154,6 +155,8 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in { def SINCOS_#FPTy : RuntimeLibcall; def REMQUO_#FPTy : RuntimeLibcall; def FDIM_#FPTy : RuntimeLibcall; + + def CABS_#FPTy : RuntimeLibcall; } foreach FPTy = [ "F32", "F64" ] in { @@ -382,7 +385,9 @@ def MEMMOVE : RuntimeLibcall; def MEMMOVE_CHK : RuntimeLibcall; def MEMSET : RuntimeLibcall; def MEMSET_CHK : RuntimeLibcall; +def MALLOC : RuntimeLibcall; def CALLOC : RuntimeLibcall; +def FREE : RuntimeLibcall; def BZERO : RuntimeLibcall; def STRLEN : RuntimeLibcall; @@ -569,6 +574,302 @@ def OBJC_RETAIN_AUTORELEASE : RuntimeLibcall; def OBJC_SYNC_ENTER : RuntimeLibcall; def OBJC_SYNC_EXIT : RuntimeLibcall; +def ABORT : RuntimeLibcall; +def ABS : RuntimeLibcall; +def ACCESS : RuntimeLibcall; +def ALIGNED_ALLOC : RuntimeLibcall; +def ATEXIT : RuntimeLibcall; +def ATOF : RuntimeLibcall; +def ATOI : RuntimeLibcall; +def ATOL : RuntimeLibcall; +def ATOLL : RuntimeLibcall; +def BCMP : RuntimeLibcall; +def BCOPY : RuntimeLibcall; +def CHMOD : RuntimeLibcall; +def CHOWN : RuntimeLibcall; +def CLEARERR : RuntimeLibcall; +def CLOSEDIR : RuntimeLibcall; +def CTERMID : RuntimeLibcall; +def CXA_ATEXIT : RuntimeLibcall; +def CXA_GUARD_ABORT : RuntimeLibcall; +def CXA_GUARD_ACQUIRE : RuntimeLibcall; +def CXA_GUARD_RELEASE : RuntimeLibcall; +def CXA_THROW : RuntimeLibcall; +def DUNDER_ISOC99_SCANF : RuntimeLibcall; +def DUNDER_ISOC99_SSCANF : RuntimeLibcall; +def DUNDER_STRDUP : RuntimeLibcall; +def DUNDER_STRNDUP : RuntimeLibcall; +def DUNDER_STRTOK_R : RuntimeLibcall; +def ENUM_VARIANT : RuntimeLibcall; +def EXECL : RuntimeLibcall; +def EXECLE : RuntimeLibcall; +def EXECLP : RuntimeLibcall; +def EXECV : RuntimeLibcall; +def EXECVE : RuntimeLibcall; +def EXECVP : RuntimeLibcall; +def EXECVPE : RuntimeLibcall; +def EXIT : RuntimeLibcall; +def FCLOSE : RuntimeLibcall; +def FDOPEN : RuntimeLibcall; +def FEOF : RuntimeLibcall; +def FERROR : RuntimeLibcall; +def FFLUSH : RuntimeLibcall; +def FFS : RuntimeLibcall; +def FFSL : RuntimeLibcall; +def FFSLL : RuntimeLibcall; +def FGETC : RuntimeLibcall; +def FGETC_UNLOCKED : RuntimeLibcall; +def FGETPOS : RuntimeLibcall; +def FGETS : RuntimeLibcall; +def FGETS_UNLOCKED : RuntimeLibcall; +def FILENO : RuntimeLibcall; +def FIPRINTF : RuntimeLibcall; +def FLOCKFILE : RuntimeLibcall; +def FLS : RuntimeLibcall; +def FLSL : RuntimeLibcall; +def FLSLL : RuntimeLibcall; +def FOPEN : RuntimeLibcall; +def FOPEN64 : RuntimeLibcall; +def FORK : RuntimeLibcall; +def FPRINTF : RuntimeLibcall; +def FPUTC : RuntimeLibcall; +def FPUTC_UNLOCKED : RuntimeLibcall; +def FPUTS : RuntimeLibcall; +def FPUTS_UNLOCKED : RuntimeLibcall; +def FREAD : RuntimeLibcall; +def FREAD_UNLOCKED : RuntimeLibcall; +def FSCANF : RuntimeLibcall; +def FSEEK : RuntimeLibcall; +def FSEEKO : RuntimeLibcall; +def FSEEKO64 : RuntimeLibcall; +def FSETPOS : RuntimeLibcall; +def FSTAT : RuntimeLibcall; +def FSTAT64 : RuntimeLibcall; +def FSTATVFS : RuntimeLibcall; +def FSTATVFS64 : RuntimeLibcall; +def FTELL : RuntimeLibcall; +def FTELLO : RuntimeLibcall; +def FTELLO64 : RuntimeLibcall; +def FTRYLOCKFILE : RuntimeLibcall; +def FUNLOCKFILE : RuntimeLibcall; +def FWRITE : RuntimeLibcall; +def FWRITE_UNLOCKED : RuntimeLibcall; +def GETC : RuntimeLibcall; +def GETCHAR : RuntimeLibcall; +def GETCHAR_UNLOCKED : RuntimeLibcall; +def GETC_UNLOCKED : RuntimeLibcall; +def GETENV : RuntimeLibcall; +def GETITIMER : RuntimeLibcall; +def GETLOGIN_R : RuntimeLibcall; +def GETPWNAM : RuntimeLibcall; +def GETS : RuntimeLibcall; +def GETTIMEOFDAY : RuntimeLibcall; +def HTONL : RuntimeLibcall; +def HTONS : RuntimeLibcall; +def IPRINTF : RuntimeLibcall; +def ISASCII : RuntimeLibcall; +def ISDIGIT : RuntimeLibcall; +def LABS : RuntimeLibcall; +def LCHOWN : RuntimeLibcall; +def LLABS : RuntimeLibcall; +def LSTAT : RuntimeLibcall; +def LSTAT64 : RuntimeLibcall; +def MEMALIGN : RuntimeLibcall; +def MEMCCPY : RuntimeLibcall; +def MEMCCPY_CHK : RuntimeLibcall; +def MEMCHR : RuntimeLibcall; +def MEMPCPY : RuntimeLibcall; +def MEMPCPY_CHK : RuntimeLibcall; +def MEMRCHR : RuntimeLibcall; +def MEMSET_PATTERN16 : RuntimeLibcall; +def MEMSET_PATTERN4 : RuntimeLibcall; +def MEMSET_PATTERN8 : RuntimeLibcall; +def MKDIR : RuntimeLibcall; +def MKTIME : RuntimeLibcall; +def MSVC_DELETE_ARRAY_PTR32 : RuntimeLibcall; +def MSVC_DELETE_ARRAY_PTR32_INT : RuntimeLibcall; +def MSVC_DELETE_ARRAY_PTR32_NOTHROW : RuntimeLibcall; +def MSVC_DELETE_ARRAY_PTR64 : RuntimeLibcall; +def MSVC_DELETE_ARRAY_PTR64_LONGLONG : RuntimeLibcall; +def MSVC_DELETE_ARRAY_PTR64_NOTHROW : RuntimeLibcall; +def MSVC_DELETE_PTR32 : RuntimeLibcall; +def MSVC_DELETE_PTR32_INT : RuntimeLibcall; +def MSVC_DELETE_PTR32_NOTHROW : RuntimeLibcall; +def MSVC_DELETE_PTR64 : RuntimeLibcall; +def MSVC_DELETE_PTR64_LONGLONG : RuntimeLibcall; +def MSVC_DELETE_PTR64_NOTHROW : RuntimeLibcall; +def MSVC_NEW_ARRAY_INT : RuntimeLibcall; +def MSVC_NEW_ARRAY_INT_NOTHROW : RuntimeLibcall; +def MSVC_NEW_ARRAY_LONGLONG : RuntimeLibcall; +def MSVC_NEW_ARRAY_LONGLONG_NOTHROW : RuntimeLibcall; +def MSVC_NEW_INT : RuntimeLibcall; +def MSVC_NEW_INT_NOTHROW : RuntimeLibcall; +def MSVC_NEW_LONGLONG : RuntimeLibcall; +def MSVC_NEW_LONGLONG_NOTHROW : RuntimeLibcall; +def NTOHL : RuntimeLibcall; +def NTOHS : RuntimeLibcall; +def OPEN : RuntimeLibcall; +def OPEN64 : RuntimeLibcall; +def OPENDIR : RuntimeLibcall; +def PCLOSE : RuntimeLibcall; +def PERROR : RuntimeLibcall; +def POPEN : RuntimeLibcall; +def POSIX_MEMALIGN : RuntimeLibcall; +def PREAD : RuntimeLibcall; +def PRINTF : RuntimeLibcall; +def PUTC : RuntimeLibcall; +def PUTCHAR : RuntimeLibcall; +def PUTCHAR_UNLOCKED : RuntimeLibcall; +def PUTC_UNLOCKED : RuntimeLibcall; +def PUTS : RuntimeLibcall; +def PVALLOC : RuntimeLibcall; +def PWRITE : RuntimeLibcall; +def QSORT : RuntimeLibcall; +def READ : RuntimeLibcall; +def READLINK : RuntimeLibcall; +def REALLOC : RuntimeLibcall; +def REALLOCARRAY : RuntimeLibcall; +def REALLOCF : RuntimeLibcall; +def REALPATH : RuntimeLibcall; +def REMOVE : RuntimeLibcall; +def RENAME : RuntimeLibcall; +def REWIND : RuntimeLibcall; +def RMDIR : RuntimeLibcall; +def SCANF : RuntimeLibcall; +def SETBUF : RuntimeLibcall; +def SETITIMER : RuntimeLibcall; +def SETVBUF : RuntimeLibcall; +def SIPRINTF : RuntimeLibcall; +def SIZE_RETURNING_NEW : RuntimeLibcall; +def SIZE_RETURNING_NEW_ALIGNED : RuntimeLibcall; +def SIZE_RETURNING_NEW_ALIGNED_HOT_COLD : RuntimeLibcall; +def SIZE_RETURNING_NEW_HOT_COLD : RuntimeLibcall; +def SMALL_FPRINTF : RuntimeLibcall; +def SMALL_PRINTF : RuntimeLibcall; +def SMALL_SPRINTF : RuntimeLibcall; +def SNPRINTF : RuntimeLibcall; +def SNPRINTF_CHK : RuntimeLibcall; +def SPRINTF : RuntimeLibcall; +def SPRINTF_CHK : RuntimeLibcall; +def SSCANF : RuntimeLibcall; +def STAT : RuntimeLibcall; +def STAT64 : RuntimeLibcall; +def STATVFS : RuntimeLibcall; +def STATVFS64 : RuntimeLibcall; +def STPCPY : RuntimeLibcall; +def STPCPY_CHK : RuntimeLibcall; +def STPNCPY : RuntimeLibcall; +def STPNCPY_CHK : RuntimeLibcall; +def STRCASECMP : RuntimeLibcall; +def STRCAT : RuntimeLibcall; +def STRCAT_CHK : RuntimeLibcall; +def STRCHR : RuntimeLibcall; +def STRCMP : RuntimeLibcall; +def STRCOLL : RuntimeLibcall; +def STRCPY : RuntimeLibcall; +def STRCPY_CHK : RuntimeLibcall; +def STRCSPN : RuntimeLibcall; +def STRDUP : RuntimeLibcall; +def STRLCAT : RuntimeLibcall; +def STRLCAT_CHK : RuntimeLibcall; +def STRLCPY : RuntimeLibcall; +def STRLCPY_CHK : RuntimeLibcall; +def STRLEN_CHK : RuntimeLibcall; +def STRNCASECMP : RuntimeLibcall; +def STRNCAT : RuntimeLibcall; +def STRNCAT_CHK : RuntimeLibcall; +def STRNCMP : RuntimeLibcall; +def STRNCPY : RuntimeLibcall; +def STRNCPY_CHK : RuntimeLibcall; +def STRNDUP : RuntimeLibcall; +def STRNLEN : RuntimeLibcall; +def STRPBRK : RuntimeLibcall; +def STRRCHR : RuntimeLibcall; +def STRSPN : RuntimeLibcall; +def STRSTR : RuntimeLibcall; +def STRTOD : RuntimeLibcall; +def STRTOF : RuntimeLibcall; +def STRTOK : RuntimeLibcall; +def STRTOK_R : RuntimeLibcall; +def STRTOL : RuntimeLibcall; +def STRTOLD : RuntimeLibcall; +def STRTOLL : RuntimeLibcall; +def STRTOUL : RuntimeLibcall; +def STRTOULL : RuntimeLibcall; +def STRXFRM : RuntimeLibcall; +def SYSTEM : RuntimeLibcall; +def TERMINATE : RuntimeLibcall; +def TIMES : RuntimeLibcall; +def TMPFILE : RuntimeLibcall; +def TMPFILE64 : RuntimeLibcall; +def TOASCII : RuntimeLibcall; +def UNAME : RuntimeLibcall; +def UNDER_IO_GETC : RuntimeLibcall; +def UNDER_IO_PUTC : RuntimeLibcall; +def UNGETC : RuntimeLibcall; +def UNLINK : RuntimeLibcall; +def UNSETENV : RuntimeLibcall; +def UTIME : RuntimeLibcall; +def UTIMES : RuntimeLibcall; +def VALLOC : RuntimeLibcall; +def VEC_CALLOC : RuntimeLibcall; +def VEC_FREE : RuntimeLibcall; +def VEC_MALLOC : RuntimeLibcall; +def VEC_REALLOC : RuntimeLibcall; +def VFPRINTF : RuntimeLibcall; +def VFSCANF : RuntimeLibcall; +def VPRINTF : RuntimeLibcall; +def VSCANF : RuntimeLibcall; +def VSNPRINTF : RuntimeLibcall; +def VSNPRINTF_CHK : RuntimeLibcall; +def VSPRINTF : RuntimeLibcall; +def VSPRINTF_CHK : RuntimeLibcall; +def VSSCANF : RuntimeLibcall; +def WCSLEN : RuntimeLibcall; +def WRITE : RuntimeLibcall; +def ZDAPV : RuntimeLibcall; +def ZDAPVJ : RuntimeLibcall; +def ZDAPVJST11ALIGN_VAL_T : RuntimeLibcall; +def ZDAPVM : RuntimeLibcall; +def ZDAPVMST11ALIGN_VAL_T : RuntimeLibcall; +def ZDAPVRKST9NOTHROW_T : RuntimeLibcall; +def ZDAPVST11ALIGN_VAL_T : RuntimeLibcall; +def ZDAPVST11ALIGN_VAL_TRKST9NOTHROW_T : RuntimeLibcall; +def ZDLPV : RuntimeLibcall; +def ZDLPVJ : RuntimeLibcall; +def ZDLPVJST11ALIGN_VAL_T : RuntimeLibcall; +def ZDLPVM : RuntimeLibcall; +def ZDLPVMST11ALIGN_VAL_T : RuntimeLibcall; +def ZDLPVRKST9NOTHROW_T : RuntimeLibcall; +def ZDLPVST11ALIGN_VAL_T : RuntimeLibcall; +def ZDLPVST11ALIGN_VAL_TRKST9NOTHROW_T : RuntimeLibcall; +def ZNAJ : RuntimeLibcall; +def ZNAJRKST9NOTHROW_T : RuntimeLibcall; +def ZNAJST11ALIGN_VAL_T : RuntimeLibcall; +def ZNAJST11ALIGN_VAL_TRKST9NOTHROW_T : RuntimeLibcall; +def ZNAM : RuntimeLibcall; +def ZNAM12__HOT_COLD_T : RuntimeLibcall; +def ZNAMRKST9NOTHROW_T : RuntimeLibcall; +def ZNAMRKST9NOTHROW_T12__HOT_COLD_T : RuntimeLibcall; +def ZNAMST11ALIGN_VAL_T : RuntimeLibcall; +def ZNAMST11ALIGN_VAL_T12__HOT_COLD_T : RuntimeLibcall; +def ZNAMST11ALIGN_VAL_TRKST9NOTHROW_T : RuntimeLibcall; +def ZNAMST11ALIGN_VAL_TRKST9NOTHROW_T12__HOT_COLD_T : RuntimeLibcall; +def ZNWJ : RuntimeLibcall; +def ZNWJRKST9NOTHROW_T : RuntimeLibcall; +def ZNWJST11ALIGN_VAL_T : RuntimeLibcall; +def ZNWJST11ALIGN_VAL_TRKST9NOTHROW_T : RuntimeLibcall; +def ZNWM : RuntimeLibcall; +def ZNWM12__HOT_COLD_T : RuntimeLibcall; +def ZNWMRKST9NOTHROW_T : RuntimeLibcall; +def ZNWMRKST9NOTHROW_T12__HOT_COLD_T : RuntimeLibcall; +def ZNWMST11ALIGN_VAL_T : RuntimeLibcall; +def ZNWMST11ALIGN_VAL_T12__HOT_COLD_T : RuntimeLibcall; +def ZNWMST11ALIGN_VAL_TRKST9NOTHROW_T : RuntimeLibcall; +def ZNWMST11ALIGN_VAL_TRKST9NOTHROW_T12__HOT_COLD_T : RuntimeLibcall; +def KMPC_ALLOC_SHARED : RuntimeLibcall; +def KMPC_FREE_SHARED : RuntimeLibcall; + //-------------------------------------------------------------------- // Global variable references //-------------------------------------------------------------------- @@ -1101,8 +1402,11 @@ def __memcpy_chk : RuntimeLibcallImpl; def __memmove_chk : RuntimeLibcallImpl; def __memset_chk : RuntimeLibcallImpl; +def malloc : RuntimeLibcallImpl; + // DSEPass can emit calloc if it finds a pair of malloc/memset def calloc : RuntimeLibcallImpl; +def free : RuntimeLibcallImpl; } // End let IsDefault = true @@ -1115,6 +1419,353 @@ def exp10l_ppcf128 : RuntimeLibcallImpl; // Stack Protector Fail def __stack_chk_fail : RuntimeLibcallImpl; +//-------------------------------------------------------------------- +// Other functions from TargetLibraryInfo +// +// TODO: These need to be organized by library and added to relevant +// systems. +/// +// -------------------------------------------------------------------- + +def __2_YAPAXI_Z : RuntimeLibcallImpl; +def __2_YAPAXIABUnothrow_t_std___Z + : RuntimeLibcallImpl; +def __2_YAPEAX_K_Z : RuntimeLibcallImpl; +def __2_YAPEAX_KAEBUnothrow_t_std___Z + : RuntimeLibcallImpl; +def __3_YAXPAX_Z : RuntimeLibcallImpl; +def __3_YAXPAXABUnothrow_t_std___Z + : RuntimeLibcallImpl; +def __3_YAXPAXI_Z : RuntimeLibcallImpl; +def __3_YAXPEAX_Z : RuntimeLibcallImpl; +def __3_YAXPEAXAEBUnothrow_t_std___Z + : RuntimeLibcallImpl; +def __3_YAXPEAX_K_Z + : RuntimeLibcallImpl; +def ___U_YAPAXI_Z : RuntimeLibcallImpl; +def ___U_YAPAXIABUnothrow_t_std___Z + : RuntimeLibcallImpl; +def ___U_YAPEAX_K_Z + : RuntimeLibcallImpl; +def ___U_YAPEAX_KAEBUnothrow_t_std___Z + : RuntimeLibcallImpl; +def ___V_YAXPAX_Z + : RuntimeLibcallImpl; +def ___V_YAXPAXABUnothrow_t_std___Z + : RuntimeLibcallImpl; +def ___V_YAXPAXI_Z + : RuntimeLibcallImpl; +def ___V_YAXPEAX_Z + : RuntimeLibcallImpl; +def ___V_YAXPEAXAEBUnothrow_t_std___Z + : RuntimeLibcallImpl; +def ___V_YAXPEAX_K_Z + : RuntimeLibcallImpl; +def _IO_getc : RuntimeLibcallImpl; +def _IO_putc : RuntimeLibcallImpl; +def _ZdaPv : RuntimeLibcallImpl; +def _ZdaPvRKSt9nothrow_t : RuntimeLibcallImpl; +def _ZdaPvSt11align_val_t : RuntimeLibcallImpl; +def _ZdaPvSt11align_val_tRKSt9nothrow_t + : RuntimeLibcallImpl; +def _ZdaPvj : RuntimeLibcallImpl; +def _ZdaPvjSt11align_val_t : RuntimeLibcallImpl; +def _ZdaPvm : RuntimeLibcallImpl; +def _ZdaPvmSt11align_val_t : RuntimeLibcallImpl; +def _ZdlPv : RuntimeLibcallImpl; +def _ZdlPvRKSt9nothrow_t : RuntimeLibcallImpl; +def _ZdlPvSt11align_val_t : RuntimeLibcallImpl; +def _ZdlPvSt11align_val_tRKSt9nothrow_t + : RuntimeLibcallImpl; +def _ZdlPvj : RuntimeLibcallImpl; +def _ZdlPvjSt11align_val_t : RuntimeLibcallImpl; +def _ZdlPvm : RuntimeLibcallImpl; +def _ZdlPvmSt11align_val_t : RuntimeLibcallImpl; +def _Znaj : RuntimeLibcallImpl; +def _ZnajRKSt9nothrow_t : RuntimeLibcallImpl; +def _ZnajSt11align_val_t : RuntimeLibcallImpl; +def _ZnajSt11align_val_tRKSt9nothrow_t + : RuntimeLibcallImpl; +def _Znam : RuntimeLibcallImpl; +def _Znam12__hot_cold_t : RuntimeLibcallImpl; +def _ZnamRKSt9nothrow_t : RuntimeLibcallImpl; +def _ZnamRKSt9nothrow_t12__hot_cold_t + : RuntimeLibcallImpl; +def _ZnamSt11align_val_t : RuntimeLibcallImpl; +def _ZnamSt11align_val_t12__hot_cold_t + : RuntimeLibcallImpl; +def _ZnamSt11align_val_tRKSt9nothrow_t + : RuntimeLibcallImpl; +def _ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t + : RuntimeLibcallImpl; +def _Znwj : RuntimeLibcallImpl; +def _ZnwjRKSt9nothrow_t : RuntimeLibcallImpl; +def _ZnwjSt11align_val_t : RuntimeLibcallImpl; +def _ZnwjSt11align_val_tRKSt9nothrow_t + : RuntimeLibcallImpl; +def _Znwm : RuntimeLibcallImpl; +def _Znwm12__hot_cold_t : RuntimeLibcallImpl; +def _ZnwmRKSt9nothrow_t : RuntimeLibcallImpl; +def _ZnwmRKSt9nothrow_t12__hot_cold_t + : RuntimeLibcallImpl; +def _ZnwmSt11align_val_t : RuntimeLibcallImpl; +def _ZnwmSt11align_val_t12__hot_cold_t + : RuntimeLibcallImpl; +def _ZnwmSt11align_val_tRKSt9nothrow_t + : RuntimeLibcallImpl; +def _ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t + : RuntimeLibcallImpl; +def __size_returning_new : RuntimeLibcallImpl; +def __size_returning_new_hot_cold + : RuntimeLibcallImpl; +def __size_returning_new_aligned + : RuntimeLibcallImpl; +def __size_returning_new_aligned_hot_cold + : RuntimeLibcallImpl; +def __cxa_atexit : RuntimeLibcallImpl; +def atexit : RuntimeLibcallImpl; +def abort : RuntimeLibcallImpl; +def exit : RuntimeLibcallImpl; +def _Exit : RuntimeLibcallImpl; +def _ZSt9terminatev : RuntimeLibcallImpl; +def __cxa_throw : RuntimeLibcallImpl; +def __cxa_guard_abort : RuntimeLibcallImpl; +def __cxa_guard_acquire : RuntimeLibcallImpl; +def __cxa_guard_release : RuntimeLibcallImpl; +def __isoc99_scanf : RuntimeLibcallImpl; +def __isoc99_sscanf : RuntimeLibcallImpl; +def __kmpc_alloc_shared : RuntimeLibcallImpl; +def __kmpc_free_shared : RuntimeLibcallImpl; +def __memccpy_chk : RuntimeLibcallImpl; +def __mempcpy_chk : RuntimeLibcallImpl; +def __small_fprintf : RuntimeLibcallImpl; +def __small_printf : RuntimeLibcallImpl; +def __small_sprintf : RuntimeLibcallImpl; +def __snprintf_chk : RuntimeLibcallImpl; +def __sprintf_chk : RuntimeLibcallImpl; +def __stpcpy_chk : RuntimeLibcallImpl; +def __stpncpy_chk : RuntimeLibcallImpl; +def __strcat_chk : RuntimeLibcallImpl; +def __strcpy_chk : RuntimeLibcallImpl; +def __strdup : RuntimeLibcallImpl; +def __strlcat_chk : RuntimeLibcallImpl; +def __strlcpy_chk : RuntimeLibcallImpl; +def __strlen_chk : RuntimeLibcallImpl; +def __strncat_chk : RuntimeLibcallImpl; +def __strncpy_chk : RuntimeLibcallImpl; +def __strndup : RuntimeLibcallImpl; +def __strtok_r : RuntimeLibcallImpl; +def __vsnprintf_chk : RuntimeLibcallImpl; +def __vsprintf_chk : RuntimeLibcallImpl; +def abs : RuntimeLibcallImpl; +def access : RuntimeLibcallImpl; +def aligned_alloc : RuntimeLibcallImpl; +def atof : RuntimeLibcallImpl; +def atoi : RuntimeLibcallImpl; +def atol : RuntimeLibcallImpl; +def atoll : RuntimeLibcallImpl; +def bcmp : RuntimeLibcallImpl; +def bcopy : RuntimeLibcallImpl; +def cabs : RuntimeLibcallImpl; +def cabsf : RuntimeLibcallImpl; +defm cabsl : LibmLongDoubleLibCall; +def chmod : RuntimeLibcallImpl; +def chown : RuntimeLibcallImpl; +def clearerr : RuntimeLibcallImpl; +def closedir : RuntimeLibcallImpl; +def ctermid : RuntimeLibcallImpl; +def execl : RuntimeLibcallImpl; +def execle : RuntimeLibcallImpl; +def execlp : RuntimeLibcallImpl; +def execv : RuntimeLibcallImpl; +def execvP : RuntimeLibcallImpl; +def execve : RuntimeLibcallImpl; +def execvp : RuntimeLibcallImpl; +def execvpe : RuntimeLibcallImpl; +def fclose : RuntimeLibcallImpl; +def fdopen : RuntimeLibcallImpl; +def feof : RuntimeLibcallImpl; +def ferror : RuntimeLibcallImpl; +def fflush : RuntimeLibcallImpl; +def ffs : RuntimeLibcallImpl; +def ffsl : RuntimeLibcallImpl; +def ffsll : RuntimeLibcallImpl; +def fgetc : RuntimeLibcallImpl; +def fgetc_unlocked : RuntimeLibcallImpl; +def fgetpos : RuntimeLibcallImpl; +def fgets : RuntimeLibcallImpl; +def fgets_unlocked : RuntimeLibcallImpl; +def fileno : RuntimeLibcallImpl; +def fiprintf : RuntimeLibcallImpl; +def flockfile : RuntimeLibcallImpl; +def fls : RuntimeLibcallImpl; +def flsl : RuntimeLibcallImpl; +def flsll : RuntimeLibcallImpl; +def fopen : RuntimeLibcallImpl; +def fopen64 : RuntimeLibcallImpl; +def fork : RuntimeLibcallImpl; +def fprintf : RuntimeLibcallImpl; +def fputc : RuntimeLibcallImpl; +def fputc_unlocked : RuntimeLibcallImpl; +def fputs : RuntimeLibcallImpl; +def fputs_unlocked : RuntimeLibcallImpl; +def fread : RuntimeLibcallImpl; +def fread_unlocked : RuntimeLibcallImpl; +def fscanf : RuntimeLibcallImpl; +def fseek : RuntimeLibcallImpl; +def fseeko : RuntimeLibcallImpl; +def fseeko64 : RuntimeLibcallImpl; +def fsetpos : RuntimeLibcallImpl; +def fstat : RuntimeLibcallImpl; +def fstat64 : RuntimeLibcallImpl; +def fstatvfs : RuntimeLibcallImpl; +def fstatvfs64 : RuntimeLibcallImpl; +def ftell : RuntimeLibcallImpl; +def ftello : RuntimeLibcallImpl; +def ftello64 : RuntimeLibcallImpl; +def ftrylockfile : RuntimeLibcallImpl; +def funlockfile : RuntimeLibcallImpl; +def fwrite : RuntimeLibcallImpl; +def fwrite_unlocked : RuntimeLibcallImpl; +def getc : RuntimeLibcallImpl; +def getc_unlocked : RuntimeLibcallImpl; +def getchar : RuntimeLibcallImpl; +def getchar_unlocked : RuntimeLibcallImpl; +def getenv : RuntimeLibcallImpl; +def getitimer : RuntimeLibcallImpl; +def getlogin_r : RuntimeLibcallImpl; +def getpwnam : RuntimeLibcallImpl; +def gets : RuntimeLibcallImpl; +def gettimeofday : RuntimeLibcallImpl; +def htonl : RuntimeLibcallImpl; +def htons : RuntimeLibcallImpl; +def iprintf : RuntimeLibcallImpl; +def isascii : RuntimeLibcallImpl; +def isdigit : RuntimeLibcallImpl; +def labs : RuntimeLibcallImpl; +def lchown : RuntimeLibcallImpl; +def llabs : RuntimeLibcallImpl; +def lstat : RuntimeLibcallImpl; +def lstat64 : RuntimeLibcallImpl; +def memalign : RuntimeLibcallImpl; +def memccpy : RuntimeLibcallImpl; +def memchr : RuntimeLibcallImpl; +def memcmp : RuntimeLibcallImpl; +def mempcpy : RuntimeLibcallImpl; +def memrchr : RuntimeLibcallImpl; +def memset_pattern16 : RuntimeLibcallImpl; +def memset_pattern4 : RuntimeLibcallImpl; +def memset_pattern8 : RuntimeLibcallImpl; +def mkdir : RuntimeLibcallImpl; +def mktime : RuntimeLibcallImpl; +def ntohl : RuntimeLibcallImpl; +def ntohs : RuntimeLibcallImpl; +def open : RuntimeLibcallImpl; +def open64 : RuntimeLibcallImpl; +def opendir : RuntimeLibcallImpl; +def pclose : RuntimeLibcallImpl; +def perror : RuntimeLibcallImpl; +def popen : RuntimeLibcallImpl; +def posix_memalign : RuntimeLibcallImpl; +def pread : RuntimeLibcallImpl; +def printf : RuntimeLibcallImpl; +def putc : RuntimeLibcallImpl; +def putc_unlocked : RuntimeLibcallImpl; +def putchar : RuntimeLibcallImpl; +def putchar_unlocked : RuntimeLibcallImpl; +def puts : RuntimeLibcallImpl; +def pvalloc : RuntimeLibcallImpl; +def pwrite : RuntimeLibcallImpl; +def qsort : RuntimeLibcallImpl; +def read : RuntimeLibcallImpl; +def readlink : RuntimeLibcallImpl; +def realloc : RuntimeLibcallImpl; +def reallocf : RuntimeLibcallImpl; +def reallocarray : RuntimeLibcallImpl; +def realpath : RuntimeLibcallImpl; +def remove : RuntimeLibcallImpl; +def rename : RuntimeLibcallImpl; +def rewind : RuntimeLibcallImpl; +def rmdir : RuntimeLibcallImpl; +def scanf : RuntimeLibcallImpl; +def setbuf : RuntimeLibcallImpl; +def setitimer : RuntimeLibcallImpl; +def setvbuf : RuntimeLibcallImpl; +def siprintf : RuntimeLibcallImpl; +def snprintf : RuntimeLibcallImpl; +def sprintf : RuntimeLibcallImpl; +def sscanf : RuntimeLibcallImpl; +def stat : RuntimeLibcallImpl; +def stat64 : RuntimeLibcallImpl; +def statvfs : RuntimeLibcallImpl; +def statvfs64 : RuntimeLibcallImpl; +def stpcpy : RuntimeLibcallImpl; +def stpncpy : RuntimeLibcallImpl; +def strcasecmp : RuntimeLibcallImpl; +def strcat : RuntimeLibcallImpl; +def strchr : RuntimeLibcallImpl; +def strcmp : RuntimeLibcallImpl; +def strcoll : RuntimeLibcallImpl; +def strcpy : RuntimeLibcallImpl; +def strcspn : RuntimeLibcallImpl; +def strdup : RuntimeLibcallImpl; +def strlcat : RuntimeLibcallImpl; +def strlcpy : RuntimeLibcallImpl; +def strlen : RuntimeLibcallImpl; +def strncasecmp : RuntimeLibcallImpl; +def strncat : RuntimeLibcallImpl; +def strncmp : RuntimeLibcallImpl; +def strncpy : RuntimeLibcallImpl; +def strndup : RuntimeLibcallImpl; +def strnlen : RuntimeLibcallImpl; +def strpbrk : RuntimeLibcallImpl; +def strrchr : RuntimeLibcallImpl; +def strspn : RuntimeLibcallImpl; +def strstr : RuntimeLibcallImpl; +def strtod : RuntimeLibcallImpl; +def strtof : RuntimeLibcallImpl; +def strtok : RuntimeLibcallImpl; +def strtok_r : RuntimeLibcallImpl; +def strtol : RuntimeLibcallImpl; +def strtold : RuntimeLibcallImpl; +def strtoll : RuntimeLibcallImpl; +def strtoul : RuntimeLibcallImpl; +def strtoull : RuntimeLibcallImpl; +def strxfrm : RuntimeLibcallImpl; +def system : RuntimeLibcallImpl; +def times : RuntimeLibcallImpl; +def tmpfile : RuntimeLibcallImpl; +def tmpfile64 : RuntimeLibcallImpl; +def toascii : RuntimeLibcallImpl; +def uname : RuntimeLibcallImpl; +def ungetc : RuntimeLibcallImpl; +def unlink : RuntimeLibcallImpl; +def unsetenv : RuntimeLibcallImpl; +def utime : RuntimeLibcallImpl; +def utimes : RuntimeLibcallImpl; +def valloc : RuntimeLibcallImpl; +def vec_calloc : RuntimeLibcallImpl; +def vec_free : RuntimeLibcallImpl; +def vec_malloc : RuntimeLibcallImpl; +def vec_realloc : RuntimeLibcallImpl; +def vfprintf : RuntimeLibcallImpl; +def vfscanf : RuntimeLibcallImpl; +def vprintf : RuntimeLibcallImpl; +def vscanf : RuntimeLibcallImpl; +def vsnprintf : RuntimeLibcallImpl; +def vsprintf : RuntimeLibcallImpl; +def vsscanf : RuntimeLibcallImpl; +def wcslen : RuntimeLibcallImpl; +def write : RuntimeLibcallImpl; + //-------------------------------------------------------------------- // compiler-rt/libgcc but 64-bit only, not available by default //-------------------------------------------------------------------- @@ -1326,6 +1977,11 @@ defvar DarwinSinCosStret = LibcallImpls<(add __sincosf_stret, __sincos_stret, darwinHasSinCosStret>; defvar DarwinExp10 = LibcallImpls<(add __exp10f, __exp10), darwinHasExp10>; +defvar DarwinMemsetPattern = LibcallImpls<(add memset_pattern4, + memset_pattern8, + memset_pattern16), + darwinHasMemsetPattern>; + defvar SecurityCheckCookieIfWinMSVC = LibcallImpls<(add __security_check_cookie, __security_cookie), isWindowsMSVCOrItaniumEnvironment>; @@ -1483,7 +2139,7 @@ def AArch64SystemLibrary : SystemRuntimeLibrary< AArch64LibcallImpls, LibcallImpls<(add Int128RTLibcalls), isAArch64_ILP64>, LibcallImpls<(add bzero), isOSDarwin>, - DarwinExp10, DarwinSinCosStret, + DarwinExp10, DarwinSinCosStret, DarwinMemsetPattern, LibmHasSinCosF32, LibmHasSinCosF64, LibmHasSinCosF128, DefaultLibmExp10, DefaultStackProtector, @@ -1953,7 +2609,7 @@ def ARMSystemLibrary WindowARMFPIntCasts, SecurityCheckCookieIfWinMSVC, AEABIDivRemCalls, - DarwinSinCosStret, DarwinExp10, + DarwinSinCosStret, DarwinExp10, DarwinMemsetPattern, LibmHasSinCosF32, LibmHasSinCosF64, LibmHasSinCosF128, DefaultLibmExp10, @@ -2638,7 +3294,7 @@ defvar MemChkLibcalls = [__memcpy_chk, __memset_chk, __memmove_chk]; defvar X86CommonLibcalls = (add (sub WinDefaultLibcallImpls, WindowsDivRemMulLibcallOverrides, MemChkLibcalls), - DarwinSinCosStret, DarwinExp10, + DarwinSinCosStret, DarwinExp10, DarwinMemsetPattern, X86_F128_Libcalls, LibmHasSinCosF80, // FIXME: Depends on long double SinCosF32F64Libcalls, diff --git a/llvm/include/llvm/MC/MCInstrDesc.h b/llvm/include/llvm/MC/MCInstrDesc.h index c2f15b81da02c..5722213347d51 100644 --- a/llvm/include/llvm/MC/MCInstrDesc.h +++ b/llvm/include/llvm/MC/MCInstrDesc.h @@ -49,8 +49,7 @@ enum OperandConstraint { /// private, all access should go through the MCOperandInfo accessors. /// See the accessors for a description of what these are. enum OperandFlags { - LookupPtrRegClass = 0, - LookupRegClassByHwMode, + LookupRegClassByHwMode = 0, Predicate, OptionalDef, BranchTarget @@ -90,9 +89,6 @@ class MCOperandInfo { /// operand is a register. If LookupRegClassByHwMode is set, then this is an /// index into a table in TargetInstrInfo or MCInstrInfo which contains the /// real register class ID. - /// - /// If isLookupPtrRegClass is set, then this is an index that is passed to - /// TargetRegisterInfo::getPointerRegClass(x) to get a dynamic register class. int16_t RegClass; /// These are flags from the MCOI::OperandFlags enum. @@ -104,13 +100,6 @@ class MCOperandInfo { /// Operand constraints (see OperandConstraint enum). uint16_t Constraints; - /// Set if this operand is a pointer value and it requires a callback - /// to look up its register class. - // TODO: Deprecated in favor of isLookupRegClassByHwMode - bool isLookupPtrRegClass() const { - return Flags & (1 << MCOI::LookupPtrRegClass); - } - /// Set if this operand is a value that requires the current hwmode to look up /// its register class. bool isLookupRegClassByHwMode() const { diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h index adfdccdb5ab77..168131b43cca8 100644 --- a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h +++ b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h @@ -22,6 +22,7 @@ namespace llvm { class MCTargetOptions; enum class EmitDwarfUnwindType; +class StringRef; namespace mc { @@ -62,9 +63,9 @@ LLVM_ABI bool getX86RelaxRelocations(); LLVM_ABI bool getX86Sse2Avx(); -LLVM_ABI std::string getABIName(); +LLVM_ABI StringRef getABIName(); -LLVM_ABI std::string getAsSecureLogFile(); +LLVM_ABI StringRef getAsSecureLogFile(); /// Create this object with static storage to register mc-related command /// line options. diff --git a/llvm/include/llvm/Support/AllocToken.h b/llvm/include/llvm/Support/AllocToken.h index e40d8163a9d7c..1dc3a0cacef24 100644 --- a/llvm/include/llvm/Support/AllocToken.h +++ b/llvm/include/llvm/Support/AllocToken.h @@ -46,6 +46,9 @@ inline constexpr AllocTokenMode DefaultAllocTokenMode = LLVM_ABI std::optional getAllocTokenModeFromString(StringRef Name); +/// Returns the canonical string name for the given AllocTokenMode. +LLVM_ABI StringRef getAllocTokenModeAsString(AllocTokenMode Mode); + /// Metadata about an allocation used to generate a token ID. struct AllocTokenMetadata { SmallString<64> TypeName; diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index 6abde996e6dc8..96a7d7c2091d2 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -918,16 +918,23 @@ def slice; def encoder; def decoder; -/// PointerLikeRegClass - Values that are designed to have pointer width are -/// derived from this. TableGen treats the register class as having a symbolic -/// type that it doesn't know, and resolves the actual regclass to use by using -/// the TargetRegisterInfo::getPointerRegClass() hook at codegen time. -/// -/// This is deprecated in favor of RegClassByHwMode. +/// PointerLikeRegClass - Pseudoinstruction operands that are designed +/// to have pointer width are derived from this. This should only be +/// used by StandardPseudoInstruction instructions. No target specific +/// instruction should use this. class PointerLikeRegClass { int RegClassKind = Kind; } +/// ptr_rc definition - Mark this operand as being a pointer value +/// whose register class needs to be defined by the target. Targets +/// should provide instruction definition overrides which substitute +/// the uses of this with the backend defined RegisterClass or +/// RegClassByHwMode to use for pointer virtual registers for a +/// particular opcode (typically by defining a subsitute instruction +/// with RemapPointerOperands). +def ptr_rc : PointerLikeRegClass<0>; + /// RegClassByHwMode - Operands that change the register class based /// on the subtarget are derived from this. TableGen /// treats the register class as having a symbolic kind that it @@ -941,13 +948,6 @@ class RegClassByHwMode Modes, list Objects = RegClasses; } -/// ptr_rc definition - Mark this operand as being a pointer value whose -/// register class is resolved dynamically via a callback to TargetInstrInfo. -/// FIXME: We should probably change this to a class which contain a list of -/// flags. But currently we have but one flag. -// Deprecated, use RegClassByHwMode instead. -def ptr_rc : PointerLikeRegClass<0>; - /// unknown definition - Mark this operand as being of unknown type, causing /// it to be resolved by inference in the context it is used. class unknown_class; diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp index 4064b25d9d4e7..8a8c2277012ec 100644 --- a/llvm/lib/Analysis/Delinearization.cpp +++ b/llvm/lib/Analysis/Delinearization.cpp @@ -704,44 +704,6 @@ bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE, return !Subscripts.empty(); } -bool llvm::tryDelinearizeFixedSizeImpl( - ScalarEvolution *SE, Instruction *Inst, const SCEV *AccessFn, - SmallVectorImpl &Subscripts, SmallVectorImpl &Sizes) { - Value *SrcPtr = getLoadStorePointerOperand(Inst); - - // Check the simple case where the array dimensions are fixed size. - auto *SrcGEP = dyn_cast(SrcPtr); - if (!SrcGEP) - return false; - - getIndexExpressionsFromGEP(*SE, SrcGEP, Subscripts, Sizes); - - // Check that the two size arrays are non-empty and equal in length and - // value. - // TODO: it would be better to let the caller to clear Subscripts, similar - // to how we handle Sizes. - if (Sizes.empty() || Subscripts.size() <= 1) { - Subscripts.clear(); - return false; - } - - // Check that for identical base pointers we do not miss index offsets - // that have been added before this GEP is applied. - Value *SrcBasePtr = SrcGEP->getOperand(0)->stripPointerCasts(); - const SCEVUnknown *SrcBase = - dyn_cast(SE->getPointerBase(AccessFn)); - if (!SrcBase || SrcBasePtr != SrcBase->getValue()) { - Subscripts.clear(); - return false; - } - - assert(Subscripts.size() == Sizes.size() + 1 && - "Expected equal number of entries in the list of size and " - "subscript."); - - return true; -} - namespace { void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI, diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 44ee7f63cbc8e..cd2be1652bd97 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -516,10 +516,12 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin, assert(End->isDefined() && "Invalid end label"); addLabelAddress(D, dwarf::DW_AT_low_pc, Begin); - if (DD->getDwarfVersion() < 4) - addLabelAddress(D, dwarf::DW_AT_high_pc, End); - else + if (DD->getDwarfVersion() >= 4 && + (!isDwoUnit() || !llvm::isRangeRelaxable(Begin, End))) { addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin); + return; + } + addLabelAddress(D, dwarf::DW_AT_high_pc, End); } // Add info for Wasm-global-based relocation. diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index d503d7a2345fd..fef3a3663d3a8 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -66,10 +66,6 @@ const TargetRegisterClass *TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, const MCOperandInfo &OpInfo = MCID.operands()[OpNum]; int16_t RegClass = getOpRegClassID(OpInfo); - // TODO: Remove isLookupPtrRegClass in favor of isLookupRegClassByHwMode - if (OpInfo.isLookupPtrRegClass()) - return TRI.getPointerRegClass(RegClass); - // Instructions like INSERT_SUBREG do not have fixed register classes. if (RegClass < 0) return nullptr; diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index cbe7a7b9f77f4..a5f842a5fb520 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -130,13 +130,23 @@ bool RuntimeLibcallsInfo::darwinHasExp10(const Triple &TT) { } } +/// TODO: There is really no guarantee that sizeof(size_t) is equal to the index +/// size of the default address space. This matches TargetLibraryInfo and should +/// be kept in sync. +static IntegerType *getSizeTType(LLVMContext &Ctx, const DataLayout &DL) { + return DL.getIndexType(Ctx, /*AddressSpace=*/0); +} + std::pair RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, const DataLayout &DL, RTLIB::LibcallImpl LibcallImpl) const { + // TODO: NoCallback probably unsafe in general static constexpr Attribute::AttrKind CommonFnAttrs[] = { Attribute::MustProgress, Attribute::NoCallback, Attribute::NoFree, Attribute::NoSync, Attribute::NoUnwind, Attribute::WillReturn}; + static constexpr Attribute::AttrKind MemoryFnAttrs[] = { + Attribute::MustProgress, Attribute::NoUnwind, Attribute::WillReturn}; static constexpr Attribute::AttrKind CommonPtrArgAttrs[] = { Attribute::NoAlias, Attribute::WriteOnly, Attribute::NonNull}; @@ -182,6 +192,71 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, return {FunctionType::get(RetTy, {ScalarTy}, false), Attrs}; } + case RTLIB::impl_malloc: + case RTLIB::impl_calloc: { + AttrBuilder FuncAttrBuilder(Ctx); + for (Attribute::AttrKind Attr : MemoryFnAttrs) + FuncAttrBuilder.addAttribute(Attr); + FuncAttrBuilder.addAttribute(Attribute::NoFree); + + AllocFnKind AllocKind = AllocFnKind::Alloc; + if (LibcallImpl == RTLIB::impl_malloc) + AllocKind |= AllocFnKind::Uninitialized; + + // TODO: Set memory attribute + FuncAttrBuilder.addAllocKindAttr(AllocKind); + FuncAttrBuilder.addAttribute("alloc-family", "malloc"); + FuncAttrBuilder.addAllocSizeAttr(0, LibcallImpl == RTLIB::impl_malloc + ? std::nullopt + : std::make_optional(1)); + + AttributeList Attrs; + Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder); + + { + AttrBuilder ArgAttrBuilder(Ctx); + for (Attribute::AttrKind AK : CommonPtrArgAttrs) + ArgAttrBuilder.addAttribute(AK); + + Attrs = Attrs.addRetAttribute(Ctx, Attribute::NoUndef); + Attrs = Attrs.addRetAttribute(Ctx, Attribute::NoAlias); + Attrs = Attrs.addParamAttribute(Ctx, 0, Attribute::NoUndef); + if (LibcallImpl == RTLIB::impl_calloc) + Attrs = Attrs.addParamAttribute(Ctx, 1, Attribute::NoUndef); + } + + IntegerType *SizeT = getSizeTType(Ctx, DL); + PointerType *PtrTy = PointerType::get(Ctx, 0); + SmallVector ArgTys = {SizeT}; + if (LibcallImpl == RTLIB::impl_calloc) + ArgTys.push_back(SizeT); + + return {FunctionType::get(PtrTy, ArgTys, false), Attrs}; + } + case RTLIB::impl_free: { + // TODO: Set memory attribute + AttrBuilder FuncAttrBuilder(Ctx); + for (Attribute::AttrKind Attr : MemoryFnAttrs) + FuncAttrBuilder.addAttribute(Attr); + + FuncAttrBuilder.addAllocKindAttr(AllocFnKind::Free); + FuncAttrBuilder.addAttribute("alloc-family", "malloc"); + + AttributeList Attrs; + Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder); + + { + AttrBuilder ArgAttrBuilder(Ctx); + ArgAttrBuilder.addAttribute(Attribute::NoUndef); + ArgAttrBuilder.addAttribute(Attribute::AllocatedPointer); + ArgAttrBuilder.addCapturesAttr(CaptureInfo::none()); + Attrs = Attrs.addParamAttributes(Ctx, 0, ArgAttrBuilder); + } + + return {FunctionType::get(Type::getVoidTy(Ctx), {PointerType::get(Ctx, 0)}, + false), + Attrs}; + } case RTLIB::impl_sqrtf: case RTLIB::impl_sqrt: { AttrBuilder FuncAttrBuilder(Ctx); diff --git a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp index ff95ff78fd53a..22494fa11eb2a 100644 --- a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp +++ b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp @@ -24,6 +24,13 @@ using namespace llvm; return *NAME##View; \ } +#define MCSTROPT(NAME) \ + static cl::opt *NAME##View; \ + StringRef llvm::mc::get##NAME() { \ + assert(NAME##View && "RegisterMCTargetOptionsFlags not created."); \ + return *NAME##View; \ + } + #define MCOPT_EXP(TY, NAME) \ MCOPT(TY, NAME) \ std::optional llvm::mc::getExplicit##NAME() { \ @@ -52,8 +59,8 @@ MCOPT(bool, Crel) MCOPT(bool, ImplicitMapSyms) MCOPT(bool, X86RelaxRelocations) MCOPT(bool, X86Sse2Avx) -MCOPT(std::string, ABIName) -MCOPT(std::string, AsSecureLogFile) +MCSTROPT(ABIName) +MCSTROPT(AsSecureLogFile) llvm::mc::RegisterMCTargetOptionsFlags::RegisterMCTargetOptionsFlags() { #define MCBINDOPT(NAME) \ diff --git a/llvm/lib/Support/AllocToken.cpp b/llvm/lib/Support/AllocToken.cpp index daa40d4e9dcc6..cabe52189c4bb 100644 --- a/llvm/lib/Support/AllocToken.cpp +++ b/llvm/lib/Support/AllocToken.cpp @@ -28,6 +28,20 @@ llvm::getAllocTokenModeFromString(StringRef Name) { .Default(std::nullopt); } +StringRef llvm::getAllocTokenModeAsString(AllocTokenMode Mode) { + switch (Mode) { + case AllocTokenMode::Increment: + return "increment"; + case AllocTokenMode::Random: + return "random"; + case AllocTokenMode::TypeHash: + return "typehash"; + case AllocTokenMode::TypeHashPointerSplit: + return "typehashpointersplit"; + } + llvm_unreachable("Unknown AllocTokenMode"); +} + static uint64_t getStableHash(const AllocTokenMetadata &Metadata, uint64_t MaxTokens) { return getStableSipHash(Metadata.TypeName) % MaxTokens; diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 0f457c2cab61b..1a4367b84353b 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -40,6 +40,8 @@ include "AArch64SchedPredExynos.td" include "AArch64SchedPredNeoverse.td" include "AArch64Combine.td" +defm : RemapAllTargetPseudoPointerOperands; + def AArch64InstrInfo : InstrInfo; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 83ce39fa314d1..d4099b56b6d6e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22586,6 +22586,38 @@ static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG) { Flags); } +// add(trunc(ashr(A, C)), trunc(lshr(A, BW-1))), with C >= BW +// -> +// X = trunc(ashr(A, C)); add(x, lshr(X, BW-1) +// The original converts into ashr+lshr+xtn+xtn+add. The second becomes +// ashr+xtn+usra. The first form has less total latency due to more parallelism, +// but more micro-ops and seems to be slower in practice. +static SDValue performAddTruncShiftCombine(SDNode *N, SelectionDAG &DAG) { + using namespace llvm::SDPatternMatch; + EVT VT = N->getValueType(0); + if (VT != MVT::v2i32 && VT != MVT::v4i16 && VT != MVT::v8i8) + return SDValue(); + + SDValue AShr, LShr; + if (!sd_match(N, m_Add(m_Trunc(m_Value(AShr)), m_Trunc(m_Value(LShr))))) + return SDValue(); + if (AShr.getOpcode() != AArch64ISD::VASHR) + std::swap(AShr, LShr); + if (AShr.getOpcode() != AArch64ISD::VASHR || + LShr.getOpcode() != AArch64ISD::VLSHR || + AShr.getOperand(0) != LShr.getOperand(0) || + AShr.getConstantOperandVal(1) < VT.getScalarSizeInBits() || + LShr.getConstantOperandVal(1) != VT.getScalarSizeInBits() * 2 - 1) + return SDValue(); + + SDLoc DL(N); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AShr); + SDValue Shift = DAG.getNode( + AArch64ISD::VLSHR, DL, VT, Trunc, + DAG.getTargetConstant(VT.getScalarSizeInBits() - 1, DL, MVT::i32)); + return DAG.getNode(ISD::ADD, DL, VT, Trunc, Shift); +} + static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Try to change sum of two reductions. @@ -22609,6 +22641,8 @@ static SDValue performAddSubCombine(SDNode *N, return Val; if (SDValue Val = performSubWithBorrowCombine(N, DCI.DAG)) return Val; + if (SDValue Val = performAddTruncShiftCombine(N, DCI.DAG)) + return Val; if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG)) return Val; diff --git a/llvm/lib/Target/AMDGPU/R600.td b/llvm/lib/Target/AMDGPU/R600.td index 9148edb92b084..bdfaac9f42ea7 100644 --- a/llvm/lib/Target/AMDGPU/R600.td +++ b/llvm/lib/Target/AMDGPU/R600.td @@ -8,15 +8,6 @@ include "llvm/Target/Target.td" -def R600InstrInfo : InstrInfo { - let guessInstructionProperties = 1; -} - -def R600 : Target { - let InstructionSet = R600InstrInfo; - let AllowRegisterRenaming = 1; -} - let Namespace = "R600" in { foreach Index = 0-15 in { @@ -27,6 +18,18 @@ include "R600RegisterInfo.td" } +defm : RemapAllTargetPseudoPointerOperands; + +def R600InstrInfo : InstrInfo { + let guessInstructionProperties = 1; +} + +def R600 : Target { + let InstructionSet = R600InstrInfo; + let AllowRegisterRenaming = 1; +} + + def NullALU : InstrItinClass; def ALU_NULL : FuncUnit; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 2e0dc2fb9d2bd..49005fe95092f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -4774,3 +4774,14 @@ def V_ILLEGAL : Enc32, InstSI<(outs), (ins), "v_illegal"> { let hasSideEffects = 1; let SubtargetPredicate = isGFX10Plus; } + +defvar VGPR32_Ptr_Opcodes = [LOAD_STACK_GUARD]; +defvar VGPR64_Ptr_Opcodes = !listremove(PseudosWithPtrOps, VGPR32_Ptr_Opcodes); + +foreach inst = VGPR32_Ptr_Opcodes in { + def : RemapPointerOperands; +} + +foreach inst = VGPR64_Ptr_Opcodes in { + def : RemapPointerOperands; +} diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 570aae9b3c7a7..1f71d810983db 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -38,6 +38,14 @@ include "ARMSchedule.td" //===----------------------------------------------------------------------===// include "ARMInstrInfo.td" + +def Thumb1OnlyMode : HwMode<[IsThumb1Only]>; +def arm_ptr_rc : RegClassByHwMode< + [DefaultMode, Thumb1OnlyMode], + [GPR, tGPR]>; + +defm : RemapAllTargetPseudoPointerOperands; + def ARMInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AVR/AVR.td b/llvm/lib/Target/AVR/AVR.td index 22ffc4a368ad6..f4ee11984cb73 100644 --- a/llvm/lib/Target/AVR/AVR.td +++ b/llvm/lib/Target/AVR/AVR.td @@ -32,6 +32,8 @@ include "AVRRegisterInfo.td" include "AVRInstrInfo.td" +defm : RemapAllTargetPseudoPointerOperands; + def AVRInstrInfo : InstrInfo; //===---------------------------------------------------------------------===// diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td index 436b7eef600e7..50f9793fb29a7 100644 --- a/llvm/lib/Target/BPF/BPF.td +++ b/llvm/lib/Target/BPF/BPF.td @@ -13,6 +13,9 @@ include "BPFCallingConv.td" include "BPFInstrInfo.td" include "GISel/BPFRegisterBanks.td" + +defm : RemapAllTargetPseudoPointerOperands; + def BPFInstrInfo : InstrInfo; class Proc Features> diff --git a/llvm/lib/Target/CSKY/CSKY.td b/llvm/lib/Target/CSKY/CSKY.td index b5df93a9d464c..45ef9441b0a41 100644 --- a/llvm/lib/Target/CSKY/CSKY.td +++ b/llvm/lib/Target/CSKY/CSKY.td @@ -671,6 +671,8 @@ def : CK860V<"ck860fv", NoSchedModel, // Define the CSKY target. //===----------------------------------------------------------------------===// +defm : RemapAllTargetPseudoPointerOperands; + def CSKYInstrInfo : InstrInfo; diff --git a/llvm/lib/Target/DirectX/DirectX.td b/llvm/lib/Target/DirectX/DirectX.td index 4d1d45b84a683..1717d533d90fa 100644 --- a/llvm/lib/Target/DirectX/DirectX.td +++ b/llvm/lib/Target/DirectX/DirectX.td @@ -22,6 +22,8 @@ include "DXILStubs.td" // DirectX Subtarget features. //===----------------------------------------------------------------------===// +defm : RemapAllTargetPseudoPointerOperands; + def DirectXInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td index ede8463ff644b..17c72c393b432 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.td +++ b/llvm/lib/Target/Hexagon/Hexagon.td @@ -413,6 +413,8 @@ include "HexagonPatternsV65.td" include "HexagonDepMappings.td" include "HexagonIntrinsics.td" +defm : RemapAllTargetPseudoPointerOperands; + def HexagonInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Lanai/Lanai.td b/llvm/lib/Target/Lanai/Lanai.td index c6d949f42047e..9a5422db5feeb 100644 --- a/llvm/lib/Target/Lanai/Lanai.td +++ b/llvm/lib/Target/Lanai/Lanai.td @@ -21,6 +21,8 @@ include "LanaiRegisterInfo.td" include "LanaiCallingConv.td" include "LanaiInstrInfo.td" +defm : RemapAllTargetPseudoPointerOperands; + def LanaiInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index 6497ff999f6fa..67f07f0a0370e 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -202,6 +202,8 @@ def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit, // Define the LoongArch target. //===----------------------------------------------------------------------===// +defm : RemapAllTargetPseudoPointerOperands; + def LoongArchInstrInfo : InstrInfo { let guessInstructionProperties = 0; } diff --git a/llvm/lib/Target/M68k/M68k.td b/llvm/lib/Target/M68k/M68k.td index dab66d1022955..dfa44a423ae25 100644 --- a/llvm/lib/Target/M68k/M68k.td +++ b/llvm/lib/Target/M68k/M68k.td @@ -95,6 +95,8 @@ include "GISel/M68kRegisterBanks.td" include "M68kInstrInfo.td" +defm : RemapAllTargetPseudoPointerOperands; + def M68kInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/MSP430/MSP430.td b/llvm/lib/Target/MSP430/MSP430.td index 38aa30fcf4dd1..cb3949838f6f2 100644 --- a/llvm/lib/Target/MSP430/MSP430.td +++ b/llvm/lib/Target/MSP430/MSP430.td @@ -61,6 +61,8 @@ include "MSP430CallingConv.td" include "MSP430InstrInfo.td" +defm : RemapAllTargetPseudoPointerOperands; + def MSP430InstrInfo : InstrInfo; //===---------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Mips/Mips.td b/llvm/lib/Target/Mips/Mips.td index e18388c179108..6c8d177093c76 100644 --- a/llvm/lib/Target/Mips/Mips.td +++ b/llvm/lib/Target/Mips/Mips.td @@ -244,6 +244,8 @@ include "MipsScheduleI6400.td" include "MipsScheduleP5600.td" include "MipsScheduleGeneric.td" +defm : RemapAllTargetPseudoPointerOperands; + def MipsInstrInfo : InstrInfo { } diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td index 31c117a8c0fee..d41a43de95098 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -150,6 +150,16 @@ def : Proc<"sm_121", [SM121, PTX88]>; def : Proc<"sm_121a", [SM121a, PTX88]>; def : Proc<"sm_121f", [SM121f, PTX88]>; + +def Is64Bit : Predicate<"Subtarget->getTargetTriple().getArch() == Triple::nvptx64">; +def NVPTX64 : HwMode<[Is64Bit]>; + +def nvptx_ptr_rc : RegClassByHwMode< + [DefaultMode, NVPTX64], + [B32, B64]>; + +defm : RemapAllTargetPseudoPointerOperands; + def NVPTXInstrInfo : InstrInfo { } diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 5d9ec4adf45c7..dc00aebe311f9 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -820,6 +820,8 @@ def PPCAsmParserVariant : AsmParserVariant { string BreakCharacters = "."; } +defm : RemapAllTargetPseudoPointerOperands; + def PPC : Target { // Information about the instructions. let InstructionSet = PPCInstrInfo; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 65d0484805b95..d6b13680a057e 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -904,6 +904,10 @@ def PPCRegGxRCNoR0Operand : AsmOperandClass { let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber"; } +def ppc_ptr_rc : RegClassByHwMode< + [PPC32, PPC64], + [GPRC, G8RC]>; + def ptr_rc_nor0_by_hwmode : RegClassByHwMode< [PPC32, PPC64], [GPRC_NOR0, G8RC_NOX0]>; diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index b24d8637cb27f..f6f82fd9bb55f 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -96,6 +96,8 @@ def RISCVAsmWriter : AsmWriter { int PassSubtarget = 1; } +defm : RemapAllTargetPseudoPointerOperands; + def RISCV : Target { let InstructionSet = RISCVInstrInfo; let AssemblyParsers = [RISCVAsmParser]; diff --git a/llvm/lib/Target/SPIRV/SPIRV.td b/llvm/lib/Target/SPIRV/SPIRV.td index 39a4131c7f1bd..cc9c7913af427 100644 --- a/llvm/lib/Target/SPIRV/SPIRV.td +++ b/llvm/lib/Target/SPIRV/SPIRV.td @@ -14,6 +14,8 @@ include "SPIRVInstrInfo.td" include "SPIRVCombine.td" include "SPIRVBuiltins.td" +defm : RemapAllTargetPseudoPointerOperands; + def SPIRVInstrInfo : InstrInfo; class Proc Features> diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index b2cbdb2ad7375..709f49b0fecc1 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -3373,6 +3373,8 @@ SPIRVType *lowerBuiltinType(const Type *OpaqueType, TargetType = getInlineSpirvType(BuiltinType, MIRBuilder, GR); } else if (Name == "spirv.VulkanBuffer") { TargetType = getVulkanBufferType(BuiltinType, MIRBuilder, GR); + } else if (Name == "spirv.Padding") { + TargetType = GR->getOrCreatePaddingType(MIRBuilder); } else if (Name == "spirv.Layout") { TargetType = getLayoutType(BuiltinType, MIRBuilder, GR); } else { diff --git a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp index 329774df554f4..227d8716d974a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp @@ -79,15 +79,20 @@ static bool replaceCBufferAccesses(Module &M) { // The handle definition should dominate all uses of the cbuffer members. // We'll insert our getpointer calls right after it. IRBuilder<> Builder(HandleDef->getNextNode()); + auto *HandleTy = cast(Mapping.Handle->getValueType()); + auto *LayoutTy = cast(HandleTy->getTypeParameter(0)); + const StructLayout *SL = M.getDataLayout().getStructLayout(LayoutTy); - for (uint32_t Index = 0; Index < Mapping.Members.size(); ++Index) { - GlobalVariable *MemberGV = Mapping.Members[Index].GV; + for (const hlsl::CBufferMember &Member : Mapping.Members) { + GlobalVariable *MemberGV = Member.GV; if (MemberGV->use_empty()) { continue; } + uint32_t IndexInStruct = SL->getElementContainingOffset(Member.Offset); + // Create the getpointer intrinsic call. - Value *IndexVal = Builder.getInt32(Index); + Value *IndexVal = Builder.getInt32(IndexInStruct); Type *PtrType = MemberGV->getType(); Value *GetPointerCall = Builder.CreateIntrinsic( PtrType, Intrinsic::spv_resource_getpointer, {HandleDef, IndexVal}); diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 03bfeacc2071f..eea49bfdaf04b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -841,6 +841,7 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper( uint32_t Index = cast(II->getOperand(1))->getZExtValue(); Ty = cast(Ty)->getElementType(Index); } + Ty = reconstitutePeeledArrayType(Ty); } else { llvm_unreachable("Unknown handle type for spv_resource_getpointer."); } diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index bd0c7d15afd12..0b89e5f4cf316 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsSPIRV.h" @@ -224,14 +225,43 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeVoid(MachineIRBuilder &MIRBuilder) { } void SPIRVGlobalRegistry::invalidateMachineInstr(MachineInstr *MI) { - // TODO: - // - review other data structure wrt. possible issues related to removal - // of a machine instruction during instruction selection. + // Other maps that may hold MachineInstr*: + // - VRegToTypeMap: We cannot remove the definitions of `MI` from + // VRegToTypeMap because some calls to invalidateMachineInstr are replacing MI + // with another instruction defining the same register. We expect that if MI + // is a type instruction, and it is still referenced in VRegToTypeMap, then + // those registers are dead or the VRegToTypeMap is out-of-date. We do not + // expect passes to ask for the SPIR-V type of a dead register. If the + // VRegToTypeMap is out-of-date already, then there was an error before. We + // cannot add an assert to verify this because the VRegToTypeMap can be + // out-of-date. + // - FunctionToInstr & FunctionToInstrRev: At this point, we should not be + // deleting functions. No need to update. + // - AliasInstMDMap: Would require a linear search, and the Intel Alias + // instruction are not instructions instruction selection will be able to + // remove. + + const SPIRVSubtarget &ST = MI->getMF()->getSubtarget(); + const SPIRVInstrInfo *TII = ST.getInstrInfo(); + assert(!TII->isAliasingInstr(*MI) && + "Cannot invalidate aliasing instructions."); + assert(MI->getOpcode() != SPIRV::OpFunction && + "Cannot invalidate OpFunction."); + + if (MI->getOpcode() == SPIRV::OpFunctionCall) { + if (const auto *F = dyn_cast(MI->getOperand(2).getGlobal())) { + auto It = ForwardCalls.find(F); + if (It != ForwardCalls.end()) { + It->second.erase(MI); + if (It->second.empty()) + ForwardCalls.erase(It); + } + } + } + const MachineFunction *MF = MI->getMF(); auto It = LastInsertedTypeMap.find(MF); - if (It == LastInsertedTypeMap.end()) - return; - if (It->second == MI) + if (It != LastInsertedTypeMap.end() && It->second == MI) LastInsertedTypeMap.erase(MF); // remove from the duplicate tracker to avoid incorrect reuse erase(MI); @@ -314,7 +344,7 @@ Register SPIRVGlobalRegistry::createConstFP(const ConstantFP *CF, LLT LLTy = LLT::scalar(BitWidth); Register Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy); CurMF->getRegInfo().setRegClass(Res, &SPIRV::fIDRegClass); - assignFloatTypeToVReg(BitWidth, Res, I, TII); + assignSPIRVTypeToVReg(SpvType, Res, *CurMF); MachineInstr *DepMI = const_cast(SpvType); MachineIRBuilder MIRBuilder(*DepMI->getParent(), DepMI->getIterator()); @@ -890,6 +920,17 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeStruct( const StructType *Ty, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier::AccessQualifier AccQual, StructOffsetDecorator Decorator, bool EmitIR) { + Type *OriginalElementType = nullptr; + uint64_t TotalSize = 0; + if (matchPeeledArrayPattern(Ty, OriginalElementType, TotalSize)) { + SPIRVType *ElementSPIRVType = findSPIRVType( + OriginalElementType, MIRBuilder, AccQual, + /* ExplicitLayoutRequired= */ Decorator != nullptr, EmitIR); + return getOpTypeArray(TotalSize, ElementSPIRVType, MIRBuilder, + /*ExplicitLayoutRequired=*/Decorator != nullptr, + EmitIR); + } + const SPIRVSubtarget &ST = cast(MIRBuilder.getMF().getSubtarget()); SmallVector FieldTypes; @@ -1414,6 +1455,18 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateVulkanBufferType( return R; } +SPIRVType * +SPIRVGlobalRegistry::getOrCreatePaddingType(MachineIRBuilder &MIRBuilder) { + auto Key = SPIRV::irhandle_padding(); + if (const MachineInstr *MI = findMI(Key, &MIRBuilder.getMF())) + return MI; + auto *T = Type::getInt8Ty(MIRBuilder.getContext()); + SPIRVType *R = getOrCreateSPIRVIntegerType(8, MIRBuilder); + finishCreatingSPIRVType(T, R); + add(Key, R); + return R; +} + SPIRVType *SPIRVGlobalRegistry::getOrCreateLayoutType( MachineIRBuilder &MIRBuilder, const TargetExtType *T, bool EmitIr) { auto Key = SPIRV::handle(T); diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h index 09c77f0cfd4f5..e5a1a2aa8d70f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -611,6 +611,8 @@ class SPIRVGlobalRegistry : public SPIRVIRMapping { SPIRV::StorageClass::StorageClass SC, bool IsWritable, bool EmitIr = false); + SPIRVType *getOrCreatePaddingType(MachineIRBuilder &MIRBuilder); + SPIRVType *getOrCreateLayoutType(MachineIRBuilder &MIRBuilder, const TargetExtType *T, bool EmitIr = false); diff --git a/llvm/lib/Target/SPIRV/SPIRVIRMapping.h b/llvm/lib/Target/SPIRV/SPIRVIRMapping.h index c99d603d340ea..47c7676d5631c 100644 --- a/llvm/lib/Target/SPIRV/SPIRVIRMapping.h +++ b/llvm/lib/Target/SPIRV/SPIRVIRMapping.h @@ -64,6 +64,7 @@ enum SpecialTypeKind { STK_Value, STK_MachineInstr, STK_VkBuffer, + STK_Padding, STK_ExplictLayoutType, STK_Last = -1 }; @@ -149,6 +150,10 @@ inline IRHandle irhandle_vkbuffer(const Type *ElementType, SpecialTypeKind::STK_VkBuffer); } +inline IRHandle irhandle_padding() { + return std::make_tuple(nullptr, 0, SpecialTypeKind::STK_Padding); +} + inline IRHandle irhandle_explict_layout_type(const Type *Ty) { const Type *WrpTy = unifyPtrType(Ty); return irhandle_ptr(WrpTy, Ty->getTypeID(), STK_ExplictLayoutType); diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 69606c10fb224..2c27289e759eb 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -94,6 +94,8 @@ class SPIRVInstructionSelector : public InstructionSelector { private: void resetVRegsType(MachineFunction &MF); + void removeDeadInstruction(MachineInstr &MI) const; + void removeOpNamesForDeadMI(MachineInstr &MI) const; // tblgen-erated 'select' implementation, used as the initial selector for // the patterns that don't require complex C++. @@ -149,6 +151,9 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectStackRestore(MachineInstr &I) const; bool selectMemOperation(Register ResVReg, MachineInstr &I) const; + Register getOrCreateMemSetGlobal(MachineInstr &I) const; + bool selectCopyMemory(MachineInstr &I, Register SrcReg) const; + bool selectCopyMemorySized(MachineInstr &I, Register SrcReg) const; bool selectAtomicRMW(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, unsigned NewOpcode, @@ -510,22 +515,202 @@ static bool isConstReg(MachineRegisterInfo *MRI, Register OpReg) { return false; } +// TODO(168736): We should make this either a flag in tabelgen +// or reduce our dependence on the global registry, so we can remove this +// function. It can easily be missed when new intrinsics are added. + +// Most SPIR-V instrinsics are considered to have side-effects in their tablegen +// definition because they are referenced in the global registry. This is a list +// of intrinsics that have no side effects other than their references in the +// global registry. +static bool intrinsicHasSideEffects(Intrinsic::ID ID) { + switch (ID) { + // This is not an exhaustive list and may need to be updated. + case Intrinsic::spv_all: + case Intrinsic::spv_alloca: + case Intrinsic::spv_any: + case Intrinsic::spv_bitcast: + case Intrinsic::spv_const_composite: + case Intrinsic::spv_cross: + case Intrinsic::spv_degrees: + case Intrinsic::spv_distance: + case Intrinsic::spv_extractelt: + case Intrinsic::spv_extractv: + case Intrinsic::spv_faceforward: + case Intrinsic::spv_fdot: + case Intrinsic::spv_firstbitlow: + case Intrinsic::spv_firstbitshigh: + case Intrinsic::spv_firstbituhigh: + case Intrinsic::spv_frac: + case Intrinsic::spv_gep: + case Intrinsic::spv_global_offset: + case Intrinsic::spv_global_size: + case Intrinsic::spv_group_id: + case Intrinsic::spv_insertelt: + case Intrinsic::spv_insertv: + case Intrinsic::spv_isinf: + case Intrinsic::spv_isnan: + case Intrinsic::spv_lerp: + case Intrinsic::spv_length: + case Intrinsic::spv_normalize: + case Intrinsic::spv_num_subgroups: + case Intrinsic::spv_num_workgroups: + case Intrinsic::spv_ptrcast: + case Intrinsic::spv_radians: + case Intrinsic::spv_reflect: + case Intrinsic::spv_refract: + case Intrinsic::spv_resource_getpointer: + case Intrinsic::spv_resource_handlefrombinding: + case Intrinsic::spv_resource_handlefromimplicitbinding: + case Intrinsic::spv_resource_nonuniformindex: + case Intrinsic::spv_rsqrt: + case Intrinsic::spv_saturate: + case Intrinsic::spv_sdot: + case Intrinsic::spv_sign: + case Intrinsic::spv_smoothstep: + case Intrinsic::spv_step: + case Intrinsic::spv_subgroup_id: + case Intrinsic::spv_subgroup_local_invocation_id: + case Intrinsic::spv_subgroup_max_size: + case Intrinsic::spv_subgroup_size: + case Intrinsic::spv_thread_id: + case Intrinsic::spv_thread_id_in_group: + case Intrinsic::spv_udot: + case Intrinsic::spv_undef: + case Intrinsic::spv_value_md: + case Intrinsic::spv_workgroup_size: + return false; + default: + return true; + } +} + +// TODO(168736): We should make this either a flag in tabelgen +// or reduce our dependence on the global registry, so we can remove this +// function. It can easily be missed when new intrinsics are added. +static bool isOpcodeWithNoSideEffects(unsigned Opcode) { + switch (Opcode) { + case SPIRV::OpTypeVoid: + case SPIRV::OpTypeBool: + case SPIRV::OpTypeInt: + case SPIRV::OpTypeFloat: + case SPIRV::OpTypeVector: + case SPIRV::OpTypeMatrix: + case SPIRV::OpTypeImage: + case SPIRV::OpTypeSampler: + case SPIRV::OpTypeSampledImage: + case SPIRV::OpTypeArray: + case SPIRV::OpTypeRuntimeArray: + case SPIRV::OpTypeStruct: + case SPIRV::OpTypeOpaque: + case SPIRV::OpTypePointer: + case SPIRV::OpTypeFunction: + case SPIRV::OpTypeEvent: + case SPIRV::OpTypeDeviceEvent: + case SPIRV::OpTypeReserveId: + case SPIRV::OpTypeQueue: + case SPIRV::OpTypePipe: + case SPIRV::OpTypeForwardPointer: + case SPIRV::OpTypePipeStorage: + case SPIRV::OpTypeNamedBarrier: + case SPIRV::OpTypeAccelerationStructureNV: + case SPIRV::OpTypeCooperativeMatrixNV: + case SPIRV::OpTypeCooperativeMatrixKHR: + return true; + default: + return false; + } +} + bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI) { + // If there are no definitions, then assume there is some other + // side-effect that makes this instruction live. + if (MI.getNumDefs() == 0) + return false; + for (const auto &MO : MI.all_defs()) { Register Reg = MO.getReg(); - if (Reg.isPhysical() || !MRI.use_nodbg_empty(Reg)) + if (Reg.isPhysical()) { + LLVM_DEBUG(dbgs() << "Not dead: def of physical register " << Reg); return false; + } + for (const auto &UseMI : MRI.use_nodbg_instructions(Reg)) { + if (UseMI.getOpcode() != SPIRV::OpName) { + LLVM_DEBUG(dbgs() << "Not dead: def " << MO << " has use in " << UseMI); + return false; + } + } } + if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE || MI.isFakeUse() || - MI.isLifetimeMarker()) + MI.isLifetimeMarker()) { + LLVM_DEBUG( + dbgs() + << "Not dead: Opcode is LOCAL_ESCAPE, fake use, or lifetime marker.\n"); return false; - if (MI.isPHI()) + } + if (MI.isPHI()) { + LLVM_DEBUG(dbgs() << "Dead: Phi instruction with no uses.\n"); return true; + } + + // It is possible that the only side effect is that the instruction is + // referenced in the global registry. If that is the only side effect, the + // intrinsic is dead. + if (MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS || + MI.getOpcode() == TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) { + const auto &Intr = cast(MI); + if (!intrinsicHasSideEffects(Intr.getIntrinsicID())) { + LLVM_DEBUG(dbgs() << "Dead: Intrinsic with no real side effects.\n"); + return true; + } + } + if (MI.mayStore() || MI.isCall() || (MI.mayLoad() && MI.hasOrderedMemoryRef()) || MI.isPosition() || - MI.isDebugInstr() || MI.isTerminator() || MI.isJumpTableDebugInfo()) + MI.isDebugInstr() || MI.isTerminator() || MI.isJumpTableDebugInfo()) { + LLVM_DEBUG(dbgs() << "Not dead: instruction has side effects.\n"); return false; - return true; + } + + if (isPreISelGenericOpcode(MI.getOpcode())) { + // TODO: Is there a generic way to check if the opcode has side effects? + LLVM_DEBUG(dbgs() << "Dead: Generic opcode with no uses.\n"); + return true; + } + + if (isOpcodeWithNoSideEffects(MI.getOpcode())) { + LLVM_DEBUG(dbgs() << "Dead: known opcode with no side effects\n"); + return true; + } + + return false; +} + +void SPIRVInstructionSelector::removeOpNamesForDeadMI(MachineInstr &MI) const { + // Delete the OpName that uses the result if there is one. + for (const auto &MO : MI.all_defs()) { + Register Reg = MO.getReg(); + if (Reg.isPhysical()) + continue; + SmallVector UselessOpNames; + for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { + assert(UseMI.getOpcode() == SPIRV::OpName && + "There is still a use of the dead function."); + UselessOpNames.push_back(&UseMI); + } + for (MachineInstr *OpNameMI : UselessOpNames) { + GR.invalidateMachineInstr(OpNameMI); + OpNameMI->eraseFromParent(); + } + } +} + +void SPIRVInstructionSelector::removeDeadInstruction(MachineInstr &MI) const { + salvageDebugInfo(*MRI, MI); + GR.invalidateMachineInstr(&MI); + removeOpNamesForDeadMI(MI); + MI.eraseFromParent(); } bool SPIRVInstructionSelector::select(MachineInstr &I) { @@ -534,6 +719,13 @@ bool SPIRVInstructionSelector::select(MachineInstr &I) { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); + LLVM_DEBUG(dbgs() << "Checking if instruction is dead: " << I;); + if (isDead(I, *MRI)) { + LLVM_DEBUG(dbgs() << "Instruction is dead.\n"); + removeDeadInstruction(I); + return true; + } + Register Opcode = I.getOpcode(); // If it's not a GMIR instruction, we've selected it already. if (!isPreISelGenericOpcode(Opcode)) { @@ -585,9 +777,7 @@ bool SPIRVInstructionSelector::select(MachineInstr &I) { // if the instruction has been already made dead by folding it away // erase it LLVM_DEBUG(dbgs() << "Instruction is folded and dead.\n"); - salvageDebugInfo(*MRI, I); - GR.invalidateMachineInstr(&I); - I.eraseFromParent(); + removeDeadInstruction(I); return true; } @@ -1436,50 +1626,79 @@ bool SPIRVInstructionSelector::selectStackRestore(MachineInstr &I) const { .constrainAllUses(TII, TRI, RBI); } -bool SPIRVInstructionSelector::selectMemOperation(Register ResVReg, - MachineInstr &I) const { +Register +SPIRVInstructionSelector::getOrCreateMemSetGlobal(MachineInstr &I) const { + MachineIRBuilder MIRBuilder(I); + assert(I.getOperand(1).isReg() && I.getOperand(2).isReg()); + + // TODO: check if we have such GV, add init, use buildGlobalVariable. + unsigned Num = getIConstVal(I.getOperand(2).getReg(), MRI); + Function &CurFunction = GR.CurMF->getFunction(); + Type *LLVMArrTy = + ArrayType::get(IntegerType::get(CurFunction.getContext(), 8), Num); + GlobalVariable *GV = new GlobalVariable(*CurFunction.getParent(), LLVMArrTy, + true, GlobalValue::InternalLinkage, + Constant::getNullValue(LLVMArrTy)); + + Type *ValTy = Type::getInt8Ty(I.getMF()->getFunction().getContext()); + Type *ArrTy = ArrayType::get(ValTy, Num); + SPIRVType *VarTy = GR.getOrCreateSPIRVPointerType( + ArrTy, MIRBuilder, SPIRV::StorageClass::UniformConstant); + + SPIRVType *SpvArrTy = GR.getOrCreateSPIRVType( + ArrTy, MIRBuilder, SPIRV::AccessQualifier::None, false); + + unsigned Val = getIConstVal(I.getOperand(1).getReg(), MRI); + Register Const = GR.getOrCreateConstIntArray(Val, Num, I, SpvArrTy, TII); + + Register VarReg = MRI->createGenericVirtualRegister(LLT::scalar(64)); + auto MIBVar = + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVariable)) + .addDef(VarReg) + .addUse(GR.getSPIRVTypeID(VarTy)) + .addImm(SPIRV::StorageClass::UniformConstant) + .addUse(Const); + if (!MIBVar.constrainAllUses(TII, TRI, RBI)) + return Register(); + + GR.add(GV, MIBVar); + GR.addGlobalObject(GV, GR.CurMF, VarReg); + + buildOpDecorate(VarReg, I, TII, SPIRV::Decoration::Constant, {}); + return VarReg; +} + +bool SPIRVInstructionSelector::selectCopyMemory(MachineInstr &I, + Register SrcReg) const { MachineBasicBlock &BB = *I.getParent(); - Register SrcReg = I.getOperand(1).getReg(); - bool Result = true; - if (I.getOpcode() == TargetOpcode::G_MEMSET) { + Register DstReg = I.getOperand(0).getReg(); + SPIRVType *DstTy = GR.getSPIRVTypeForVReg(DstReg); + SPIRVType *SrcTy = GR.getSPIRVTypeForVReg(SrcReg); + if (GR.getPointeeType(DstTy) != GR.getPointeeType(SrcTy)) + report_fatal_error("OpCopyMemory requires operands to have the same type"); + uint64_t CopySize = getIConstVal(I.getOperand(2).getReg(), MRI); + SPIRVType *PointeeTy = GR.getPointeeType(DstTy); + const Type *LLVMPointeeTy = GR.getTypeForSPIRVType(PointeeTy); + if (!LLVMPointeeTy) + report_fatal_error( + "Unable to determine pointee type size for OpCopyMemory"); + const DataLayout &DL = I.getMF()->getFunction().getDataLayout(); + if (CopySize != DL.getTypeStoreSize(const_cast(LLVMPointeeTy))) + report_fatal_error( + "OpCopyMemory requires the size to match the pointee type size"); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCopyMemory)) + .addUse(DstReg) + .addUse(SrcReg); + if (I.getNumMemOperands()) { MachineIRBuilder MIRBuilder(I); - assert(I.getOperand(1).isReg() && I.getOperand(2).isReg()); - unsigned Val = getIConstVal(I.getOperand(1).getReg(), MRI); - unsigned Num = getIConstVal(I.getOperand(2).getReg(), MRI); - Type *ValTy = Type::getInt8Ty(I.getMF()->getFunction().getContext()); - Type *ArrTy = ArrayType::get(ValTy, Num); - SPIRVType *VarTy = GR.getOrCreateSPIRVPointerType( - ArrTy, MIRBuilder, SPIRV::StorageClass::UniformConstant); - - SPIRVType *SpvArrTy = GR.getOrCreateSPIRVType( - ArrTy, MIRBuilder, SPIRV::AccessQualifier::None, false); - Register Const = GR.getOrCreateConstIntArray(Val, Num, I, SpvArrTy, TII); - // TODO: check if we have such GV, add init, use buildGlobalVariable. - Function &CurFunction = GR.CurMF->getFunction(); - Type *LLVMArrTy = - ArrayType::get(IntegerType::get(CurFunction.getContext(), 8), Num); - // Module takes ownership of the global var. - GlobalVariable *GV = new GlobalVariable(*CurFunction.getParent(), LLVMArrTy, - true, GlobalValue::InternalLinkage, - Constant::getNullValue(LLVMArrTy)); - Register VarReg = MRI->createGenericVirtualRegister(LLT::scalar(64)); - auto MIBVar = - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVariable)) - .addDef(VarReg) - .addUse(GR.getSPIRVTypeID(VarTy)) - .addImm(SPIRV::StorageClass::UniformConstant) - .addUse(Const); - Result &= MIBVar.constrainAllUses(TII, TRI, RBI); - - GR.add(GV, MIBVar); - GR.addGlobalObject(GV, GR.CurMF, VarReg); - - buildOpDecorate(VarReg, I, TII, SPIRV::Decoration::Constant, {}); - SPIRVType *SourceTy = GR.getOrCreateSPIRVPointerType( - ValTy, I, SPIRV::StorageClass::UniformConstant); - SrcReg = MRI->createGenericVirtualRegister(LLT::scalar(64)); - selectOpWithSrcs(SrcReg, SourceTy, I, {VarReg}, SPIRV::OpBitcast); + addMemoryOperands(*I.memoperands_begin(), MIB, MIRBuilder, GR); } + return MIB.constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectCopyMemorySized(MachineInstr &I, + Register SrcReg) const { + MachineBasicBlock &BB = *I.getParent(); auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCopyMemorySized)) .addUse(I.getOperand(0).getReg()) .addUse(SrcReg) @@ -1488,9 +1707,30 @@ bool SPIRVInstructionSelector::selectMemOperation(Register ResVReg, MachineIRBuilder MIRBuilder(I); addMemoryOperands(*I.memoperands_begin(), MIB, MIRBuilder, GR); } - Result &= MIB.constrainAllUses(TII, TRI, RBI); - if (ResVReg.isValid() && ResVReg != MIB->getOperand(0).getReg()) - Result &= BuildCOPY(ResVReg, MIB->getOperand(0).getReg(), I); + return MIB.constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectMemOperation(Register ResVReg, + MachineInstr &I) const { + Register SrcReg = I.getOperand(1).getReg(); + bool Result = true; + if (I.getOpcode() == TargetOpcode::G_MEMSET) { + Register VarReg = getOrCreateMemSetGlobal(I); + if (!VarReg.isValid()) + return false; + Type *ValTy = Type::getInt8Ty(I.getMF()->getFunction().getContext()); + SPIRVType *SourceTy = GR.getOrCreateSPIRVPointerType( + ValTy, I, SPIRV::StorageClass::UniformConstant); + SrcReg = MRI->createGenericVirtualRegister(LLT::scalar(64)); + Result &= selectOpWithSrcs(SrcReg, SourceTy, I, {VarReg}, SPIRV::OpBitcast); + } + if (STI.isLogicalSPIRV()) { + Result &= selectCopyMemory(I, SrcReg); + } else { + Result &= selectCopyMemorySized(I, SrcReg); + } + if (ResVReg.isValid() && ResVReg != I.getOperand(0).getReg()) + Result &= BuildCOPY(ResVReg, I.getOperand(0).getReg(), I); return Result; } diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index 8f2fc01da476f..7fdb0fafa3719 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -1042,6 +1042,75 @@ getFirstValidInstructionInsertPoint(MachineBasicBlock &BB) { : VarPos; } +bool matchPeeledArrayPattern(const StructType *Ty, Type *&OriginalElementType, + uint64_t &TotalSize) { + // An array of N padded structs is represented as {[N-1 x <{T, pad}>], T}. + if (Ty->getStructNumElements() != 2) + return false; + + Type *FirstElement = Ty->getStructElementType(0); + Type *SecondElement = Ty->getStructElementType(1); + + if (!FirstElement->isArrayTy()) + return false; + + Type *ArrayElementType = FirstElement->getArrayElementType(); + if (!ArrayElementType->isStructTy() || + ArrayElementType->getStructNumElements() != 2) + return false; + + Type *T_in_struct = ArrayElementType->getStructElementType(0); + if (T_in_struct != SecondElement) + return false; + + auto *Padding_in_struct = + dyn_cast(ArrayElementType->getStructElementType(1)); + if (!Padding_in_struct || Padding_in_struct->getName() != "spirv.Padding") + return false; + + const uint64_t ArraySize = FirstElement->getArrayNumElements(); + TotalSize = ArraySize + 1; + OriginalElementType = ArrayElementType; + return true; +} + +Type *reconstitutePeeledArrayType(Type *Ty) { + if (!Ty->isStructTy()) + return Ty; + + auto *STy = cast(Ty); + Type *OriginalElementType = nullptr; + uint64_t TotalSize = 0; + if (matchPeeledArrayPattern(STy, OriginalElementType, TotalSize)) { + Type *ResultTy = ArrayType::get( + reconstitutePeeledArrayType(OriginalElementType), TotalSize); + return ResultTy; + } + + SmallVector NewElementTypes; + bool Changed = false; + for (Type *ElementTy : STy->elements()) { + Type *NewElementTy = reconstitutePeeledArrayType(ElementTy); + if (NewElementTy != ElementTy) + Changed = true; + NewElementTypes.push_back(NewElementTy); + } + + if (!Changed) + return Ty; + + Type *ResultTy; + if (STy->isLiteral()) + ResultTy = + StructType::get(STy->getContext(), NewElementTypes, STy->isPacked()); + else { + auto *NewTy = StructType::create(STy->getContext(), STy->getName()); + NewTy->setBody(NewElementTypes, STy->isPacked()); + ResultTy = NewTy; + } + return ResultTy; +} + std::optional getSpirvLinkageTypeFor(const SPIRVSubtarget &ST, const GlobalValue &GV) { if (GV.hasLocalLinkage() || GV.hasHiddenVisibility()) diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index 99d9d403ea70c..45e211a1e5d2a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -321,6 +321,21 @@ Type *parseBasicTypeName(StringRef &TypeName, LLVMContext &Ctx); // Returns true if the function was changed. bool sortBlocks(Function &F); +// Check for peeled array structs and recursively reconstitute them. In HLSL +// CBuffers, arrays may have padding between the elements, but not after the +// last element. To represent this in LLVM IR an array [N x T] will be +// represented as {[N-1 x {T, spirv.Padding}], T}. The function +// matchPeeledArrayPattern recognizes this pattern retrieving the type {T, +// spirv.Padding}, and the size N. +bool matchPeeledArrayPattern(const StructType *Ty, Type *&OriginalElementType, + uint64_t &TotalSize); + +// This function will turn the type {[N-1 x {T, spirv.Padding}], T} back into +// [N x {T, spirv.Padding}]. So it can be translated into SPIR-V. The offset +// decorations will be such that there will be no padding after the array when +// relevant. +Type *reconstitutePeeledArrayType(Type *Ty); + inline bool hasInitializer(const GlobalVariable *GV) { return GV->hasInitializer() && !isa(GV->getInitializer()); } diff --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td index 38b0508885069..ecf82fab5cc41 100644 --- a/llvm/lib/Target/Sparc/Sparc.td +++ b/llvm/lib/Target/Sparc/Sparc.td @@ -126,6 +126,8 @@ include "SparcCallingConv.td" include "SparcSchedule.td" include "SparcInstrInfo.td" +defm : RemapAllTargetPseudoPointerOperands; + def SparcInstrInfo : InstrInfo; def SparcAsmParser : AsmParser { diff --git a/llvm/lib/Target/SystemZ/SystemZ.td b/llvm/lib/Target/SystemZ/SystemZ.td index ec110645c62dd..95f039d6328f3 100644 --- a/llvm/lib/Target/SystemZ/SystemZ.td +++ b/llvm/lib/Target/SystemZ/SystemZ.td @@ -57,6 +57,9 @@ include "SystemZInstrHFP.td" include "SystemZInstrDFP.td" include "SystemZInstrSystem.td" + +defm : RemapAllTargetPseudoPointerOperands; + def SystemZInstrInfo : InstrInfo { let guessInstructionProperties = 0; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/VE/VE.td b/llvm/lib/Target/VE/VE.td index bb076bd9f6d41..aedce0f4ebc8f 100644 --- a/llvm/lib/Target/VE/VE.td +++ b/llvm/lib/Target/VE/VE.td @@ -30,6 +30,7 @@ include "VERegisterInfo.td" include "VECallingConv.td" include "VEInstrInfo.td" +defm : RemapAllTargetPseudoPointerOperands; def VEInstrInfo : InstrInfo {} def VEAsmParser : AsmParser { diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td index 089be5f1dc70e..67015ffcfc760 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.td +++ b/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -108,6 +108,14 @@ include "WebAssemblyRegisterInfo.td" include "WebAssemblyInstrInfo.td" +def WASM64 : HwMode<[HasAddr64]>; + +def wasm_ptr_rc : RegClassByHwMode< + [DefaultMode, WASM64], + [I32, I64]>; + +defm : RemapAllTargetPseudoPointerOperands; + def WebAssemblyInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 27ec052cfda40..8f29a64d58194 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -795,6 +795,8 @@ include "X86Schedule.td" include "X86InstrInfo.td" include "X86SchedPredicates.td" +defm : RemapAllTargetPseudoPointerOperands; + def X86InstrInfo : InstrInfo; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/XCore/XCore.td b/llvm/lib/Target/XCore/XCore.td index a97b3dd1d0a2b..fa8b9fe26bbe1 100644 --- a/llvm/lib/Target/XCore/XCore.td +++ b/llvm/lib/Target/XCore/XCore.td @@ -24,6 +24,8 @@ include "XCoreRegisterInfo.td" include "XCoreInstrInfo.td" include "XCoreCallingConv.td" +defm : RemapAllTargetPseudoPointerOperands; + def XCoreInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Xtensa/Xtensa.td b/llvm/lib/Target/Xtensa/Xtensa.td index 4ef885e19101e..eecf42f46f88b 100644 --- a/llvm/lib/Target/Xtensa/Xtensa.td +++ b/llvm/lib/Target/Xtensa/Xtensa.td @@ -44,6 +44,8 @@ include "XtensaCallingConv.td" include "XtensaInstrInfo.td" +defm : RemapAllTargetPseudoPointerOperands; + def XtensaInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index a63956c0cba6b..393586e504c17 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8336,6 +8336,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, if (auto Plan = tryToBuildVPlanWithVPRecipes( std::unique_ptr(VPlan0->duplicate()), SubRange, &LVer)) { // Now optimize the initial VPlan. + VPlanTransforms::hoistPredicatedLoads(*Plan, *PSE.getSE(), OrigLoop); VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths, *Plan, CM.getMinimalBitwidths()); VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index e41f67103e096..1c88b56ca89dc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2593,22 +2593,11 @@ void VPWidenGEPRecipe::printRecipe(raw_ostream &O, const Twine &Indent, } #endif -static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride, - unsigned CurrentPart, IRBuilderBase &Builder) { - // Use i32 for the gep index type when the value is constant, - // or query DataLayout for a more suitable index type otherwise. - const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - return !IsUnitStride || (IsScalable && (IsReverse || CurrentPart > 0)) - ? DL.getIndexType(Builder.getPtrTy(0)) - : Builder.getInt32Ty(); -} - void VPVectorEndPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; unsigned CurrentPart = getUnrollPart(*this); - bool IsUnitStride = Stride == 1 || Stride == -1; - Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true, - IsUnitStride, CurrentPart, Builder); + const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); + Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this)); // The wide store needs to start at the last vector element. Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); @@ -2644,8 +2633,8 @@ void VPVectorEndPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent, void VPVectorPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; unsigned CurrentPart = getUnrollPart(*this); - Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false, - /*IsUnitStride*/ true, CurrentPart, Builder); + const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); + Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this)); Value *Ptr = State.get(getOperand(0), VPLane(0)); Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 9174058baad65..8bf1003e923b1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -139,6 +139,41 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( return true; } +// Check if a load can be hoisted by verifying it doesn't alias with any stores +// in blocks between FirstBB and LastBB using scoped noalias metadata. +static bool canHoistLoadWithNoAliasCheck(VPReplicateRecipe *Load, + VPBasicBlock *FirstBB, + VPBasicBlock *LastBB) { + // Get the load's memory location and check if it aliases with any stores + // using scoped noalias metadata. + auto LoadLoc = vputils::getMemoryLocation(*Load); + if (!LoadLoc || !LoadLoc->AATags.Scope) + return false; + + const AAMDNodes &LoadAA = LoadLoc->AATags; + for (VPBlockBase *Block = FirstBB; Block; + Block = Block->getSingleSuccessor()) { + // This function assumes a simple linear chain of blocks. If there are + // multiple successors, we would need more complex analysis. + assert(Block->getNumSuccessors() <= 1 && + "Expected at most one successor in block chain"); + auto *VPBB = cast(Block); + for (VPRecipeBase &R : *VPBB) { + if (R.mayWriteToMemory()) { + auto Loc = vputils::getMemoryLocation(R); + // Bail out if we can't get the location or if the scoped noalias + // metadata indicates potential aliasing. + if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes( + LoadAA.Scope, Loc->AATags.NoAlias)) + return false; + } + } + if (Block == LastBB) + break; + } + return true; +} + /// Return true if we do not know how to (mechanically) hoist or sink \p R out /// of a loop region. static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R) { @@ -4010,6 +4045,122 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) { } } +// Returns the intersection of metadata from a group of loads. +static VPIRMetadata getCommonLoadMetadata(ArrayRef Loads) { + VPIRMetadata CommonMetadata = *Loads.front(); + for (VPReplicateRecipe *Load : drop_begin(Loads)) + CommonMetadata.intersect(*Load); + return CommonMetadata; +} + +void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, + const Loop *L) { + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPTypeAnalysis TypeInfo(Plan); + VPDominatorTree VPDT(Plan); + + // Group predicated loads by their address SCEV. + DenseMap> LoadsByAddress; + for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) { + auto *VPBB = cast(Block); + for (VPRecipeBase &R : *VPBB) { + auto *RepR = dyn_cast(&R); + if (!RepR || RepR->getOpcode() != Instruction::Load || + !RepR->isPredicated()) + continue; + + VPValue *Addr = RepR->getOperand(0); + const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L); + if (!isa(AddrSCEV)) + LoadsByAddress[AddrSCEV].push_back(RepR); + } + } + + // For each address, collect loads with complementary masks, sort by + // dominance, and use the earliest load. + for (auto &[Addr, Loads] : LoadsByAddress) { + if (Loads.size() < 2) + continue; + + // Collect groups of loads with complementary masks. + SmallVector> LoadGroups; + for (VPReplicateRecipe *&LoadI : Loads) { + if (!LoadI) + continue; + + VPValue *MaskI = LoadI->getMask(); + Type *TypeI = TypeInfo.inferScalarType(LoadI); + SmallVector Group; + Group.push_back(LoadI); + LoadI = nullptr; + + // Find all loads with the same type. + for (VPReplicateRecipe *&LoadJ : Loads) { + if (!LoadJ) + continue; + + Type *TypeJ = TypeInfo.inferScalarType(LoadJ); + if (TypeI == TypeJ) { + Group.push_back(LoadJ); + LoadJ = nullptr; + } + } + + // Check if any load in the group has a complementary mask with another, + // that is M1 == NOT(M2) or M2 == NOT(M1). + bool HasComplementaryMask = + any_of(drop_begin(Group), [MaskI](VPReplicateRecipe *Load) { + VPValue *MaskJ = Load->getMask(); + return match(MaskI, m_Not(m_Specific(MaskJ))) || + match(MaskJ, m_Not(m_Specific(MaskI))); + }); + + if (HasComplementaryMask) + LoadGroups.push_back(std::move(Group)); + } + + // For each group, check memory dependencies and hoist the earliest load. + for (auto &Group : LoadGroups) { + // Sort loads by dominance order, with earliest (most dominating) first. + sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) { + return VPDT.properlyDominates(A, B); + }); + + VPReplicateRecipe *EarliestLoad = Group.front(); + VPBasicBlock *FirstBB = EarliestLoad->getParent(); + VPBasicBlock *LastBB = Group.back()->getParent(); + + // Check that the load doesn't alias with stores between first and last. + if (!canHoistLoadWithNoAliasCheck(EarliestLoad, FirstBB, LastBB)) + continue; + + // Find the load with minimum alignment to use. + auto *LoadWithMinAlign = + *min_element(Group, [](VPReplicateRecipe *A, VPReplicateRecipe *B) { + return cast(A->getUnderlyingInstr())->getAlign() < + cast(B->getUnderlyingInstr())->getAlign(); + }); + + // Collect common metadata from all loads in the group. + VPIRMetadata CommonMetadata = getCommonLoadMetadata(Group); + + // Create an unpredicated load with minimum alignment using the earliest + // dominating address and common metadata. + auto *UnpredicatedLoad = new VPReplicateRecipe( + LoadWithMinAlign->getUnderlyingInstr(), EarliestLoad->getOperand(0), + /*IsSingleScalar=*/false, /*Mask=*/nullptr, /*Flags=*/{}, + CommonMetadata); + UnpredicatedLoad->insertBefore(EarliestLoad); + + // Replace all loads in the group with the unpredicated load. + for (VPReplicateRecipe *Load : Group) { + Load->replaceAllUsesWith(UnpredicatedLoad); + Load->eraseFromParent(); + } + } + } +} + void VPlanTransforms::materializeConstantVectorTripCount( VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 5fd3f756c55e3..6245a5107a5d0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -314,6 +314,12 @@ struct VPlanTransforms { /// plan using noalias metadata. static void hoistInvariantLoads(VPlan &Plan); + /// Hoist predicated loads from the same address to the loop entry block, if + /// they are guaranteed to execute on both paths (i.e., in replicate regions + /// with complementary masks P and NOT P). + static void hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, + const Loop *L); + // Materialize vector trip counts for constants early if it can simply be // computed as (Original TC / VF * UF) * VF * UF. static void diff --git a/llvm/test/CodeGen/AArch64/addtruncshift.ll b/llvm/test/CodeGen/AArch64/addtruncshift.ll index f3af50ec8cf3e..6dbe0b3d80b9a 100644 --- a/llvm/test/CodeGen/AArch64/addtruncshift.ll +++ b/llvm/test/CodeGen/AArch64/addtruncshift.ll @@ -3,14 +3,21 @@ ; RUN: llc -mtriple=aarch64-none-elf -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <2 x i32> @test_v2i64(<2 x i64> %n) { -; CHECK-LABEL: test_v2i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushr v1.2d, v0.2d, #63 -; CHECK-NEXT: sshr v0.2d, v0.2d, #35 -; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: add v0.2s, v1.2s, v0.2s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_v2i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #35 +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: usra v0.2s, v0.2s, #31 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_v2i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ushr v1.2d, v0.2d, #63 +; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #35 +; CHECK-GI-NEXT: xtn v1.2s, v1.2d +; CHECK-GI-NEXT: xtn v0.2s, v0.2d +; CHECK-GI-NEXT: add v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ret entry: %shr = lshr <2 x i64> %n, splat (i64 63) %vmovn.i4 = trunc nuw nsw <2 x i64> %shr to <2 x i32> @@ -21,14 +28,21 @@ entry: } define <4 x i16> @test_v4i32(<4 x i32> %n) { -; CHECK-LABEL: test_v4i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushr v1.4s, v0.4s, #31 -; CHECK-NEXT: sshr v0.4s, v0.4s, #17 -; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: add v0.4h, v1.4h, v0.4h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_v4i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #17 +; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: usra v0.4h, v0.4h, #15 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_v4i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ushr v1.4s, v0.4s, #31 +; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #17 +; CHECK-GI-NEXT: xtn v1.4h, v1.4s +; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: add v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ret entry: %shr = lshr <4 x i32> %n, splat (i32 31) %vmovn.i4 = trunc nuw nsw <4 x i32> %shr to <4 x i16> @@ -39,14 +53,21 @@ entry: } define <8 x i8> @test_v8i16(<8 x i16> %n) { -; CHECK-LABEL: test_v8i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushr v1.8h, v0.8h, #15 -; CHECK-NEXT: sshr v0.8h, v0.8h, #9 -; CHECK-NEXT: xtn v1.8b, v1.8h -; CHECK-NEXT: xtn v0.8b, v0.8h -; CHECK-NEXT: add v0.8b, v1.8b, v0.8b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_v8i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #9 +; CHECK-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-SD-NEXT: usra v0.8b, v0.8b, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_v8i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ushr v1.8h, v0.8h, #15 +; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #9 +; CHECK-GI-NEXT: xtn v1.8b, v1.8h +; CHECK-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-GI-NEXT: add v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ret entry: %shr = lshr <8 x i16> %n, splat (i16 15) %vmovn.i4 = trunc nuw nsw <8 x i16> %shr to <8 x i8> @@ -91,6 +112,3 @@ entry: ret <2 x i32> %add } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-GI: {{.*}} -; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/SPIRV/OpVariable_order.ll b/llvm/test/CodeGen/SPIRV/OpVariable_order.ll index 1e94be0886307..a43a4d66d04bb 100644 --- a/llvm/test/CodeGen/SPIRV/OpVariable_order.ll +++ b/llvm/test/CodeGen/SPIRV/OpVariable_order.ll @@ -13,7 +13,9 @@ define void @main() { entry: %0 = alloca <2 x i32>, align 4 + store <2 x i32> zeroinitializer, ptr %0, align 4 %1 = getelementptr <2 x i32>, ptr %0, i32 0, i32 0 %2 = alloca float, align 4 + store float 0.0, ptr %2, align 4 ret void } diff --git a/llvm/test/CodeGen/SPIRV/SpecConstants/restore-spec-type.ll b/llvm/test/CodeGen/SPIRV/SpecConstants/restore-spec-type.ll index 9e91854de1172..b0bad1819a25d 100644 --- a/llvm/test/CodeGen/SPIRV/SpecConstants/restore-spec-type.ll +++ b/llvm/test/CodeGen/SPIRV/SpecConstants/restore-spec-type.ll @@ -29,9 +29,12 @@ %Struct7 = type [2 x %Struct] %Nested = type { %Struct7 } +@G = global %Struct zeroinitializer + define spir_kernel void @foo(ptr addrspace(4) %arg1, ptr addrspace(4) %arg2) { entry: %var = alloca %Struct + store %Struct zeroinitializer, ptr %var %r1 = call %Struct @_Z29__spirv_SpecConstantComposite_1(float 1.0) store %Struct %r1, ptr addrspace(4) %arg1 %r2 = call %Struct7 @_Z29__spirv_SpecConstantComposite_2(%Struct %r1, %Struct %r1) diff --git a/llvm/test/CodeGen/SPIRV/basic_float_types.ll b/llvm/test/CodeGen/SPIRV/basic_float_types.ll index a0ba97e1d1f14..6cdc67bbf24ee 100644 --- a/llvm/test/CodeGen/SPIRV/basic_float_types.ll +++ b/llvm/test/CodeGen/SPIRV/basic_float_types.ll @@ -2,6 +2,9 @@ ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_bfloat16 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown --spirv-ext=+SPV_KHR_bfloat16 %s -o - -filetype=obj | spirv-val %} +// TODO: Open bug bfloat16 cannot be stored to. +XFAIL: * + define void @main() { entry: @@ -49,50 +52,66 @@ entry: ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_half]] Function %half_Val = alloca half, align 2 + store half 0.0, ptr %half_Val, align 2 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_bfloat]] Function %bfloat_Val = alloca bfloat, align 2 + store bfloat 0.0, ptr %bfloat_Val, align 2 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_float]] Function %float_Val = alloca float, align 4 + store float 0.0, ptr %float_Val, align 4 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_double]] Function %double_Val = alloca double, align 8 + store double 0.0, ptr %double_Val, align 8 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v2half]] Function %half2_Val = alloca <2 x half>, align 4 + store <2 x half> zeroinitializer, ptr %half2_Val, align 4 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v3half]] Function %half3_Val = alloca <3 x half>, align 8 + store <3 x half> zeroinitializer, ptr %half3_Val, align 8 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v4half]] Function %half4_Val = alloca <4 x half>, align 8 + store <4 x half> zeroinitializer, ptr %half4_Val, align 8 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v2bfloat]] Function %bfloat2_Val = alloca <2 x bfloat>, align 4 + store <2 x bfloat> zeroinitializer, ptr %bfloat2_Val, align 4 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v3bfloat]] Function %bfloat3_Val = alloca <3 x bfloat>, align 8 + store <3 x bfloat> zeroinitializer, ptr %bfloat3_Val, align 8 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v4bfloat]] Function %bfloat4_Val = alloca <4 x bfloat>, align 8 + store <4 x bfloat> zeroinitializer, ptr %bfloat4_Val, align 8 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v2float]] Function %float2_Val = alloca <2 x float>, align 8 + store <2 x float> zeroinitializer, ptr %float2_Val, align 8 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v3float]] Function %float3_Val = alloca <3 x float>, align 16 + store <3 x float> zeroinitializer, ptr %float3_Val, align 16 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v4float]] Function %float4_Val = alloca <4 x float>, align 16 + store <4 x float> zeroinitializer, ptr %float4_Val, align 16 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v2double]] Function %double2_Val = alloca <2 x double>, align 16 + store <2 x double> zeroinitializer, ptr %double2_Val, align 16 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v3double]] Function %double3_Val = alloca <3 x double>, align 32 + store <3 x double> zeroinitializer, ptr %double3_Val, align 32 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v4double]] Function %double4_Val = alloca <4 x double>, align 32 + store <4 x double> zeroinitializer, ptr %double4_Val, align 32 ret void } diff --git a/llvm/test/CodeGen/SPIRV/basic_int_types.ll b/llvm/test/CodeGen/SPIRV/basic_int_types.ll index 5aa7aaf6fbd01..1ed241eed4019 100644 --- a/llvm/test/CodeGen/SPIRV/basic_int_types.ll +++ b/llvm/test/CodeGen/SPIRV/basic_int_types.ll @@ -37,39 +37,51 @@ entry: ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_short]] Function %int16_t_Val = alloca i16, align 2 + store i16 0, ptr %int16_t_Val, align 2 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_int]] Function %int_Val = alloca i32, align 4 + store i32 0, ptr %int_Val, align 4 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_long]] Function %int64_t_Val = alloca i64, align 8 + store i64 0, ptr %int64_t_Val, align 8 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v2short]] Function %int16_t2_Val = alloca <2 x i16>, align 4 + store <2 x i16> zeroinitializer, ptr %int16_t2_Val, align 4 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v3short]] Function %int16_t3_Val = alloca <3 x i16>, align 8 + store <3 x i16> zeroinitializer, ptr %int16_t3_Val, align 8 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v4short]] Function %int16_t4_Val = alloca <4 x i16>, align 8 + store <4 x i16> zeroinitializer, ptr %int16_t4_Val, align 8 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v2int]] Function %int2_Val = alloca <2 x i32>, align 8 + store <2 x i32> zeroinitializer, ptr %int2_Val, align 8 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v3int]] Function %int3_Val = alloca <3 x i32>, align 16 + store <3 x i32> zeroinitializer, ptr %int3_Val, align 16 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v4int]] Function %int4_Val = alloca <4 x i32>, align 16 + store <4 x i32> zeroinitializer, ptr %int4_Val, align 16 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v2long]] Function %int64_t2_Val = alloca <2 x i64>, align 16 + store <2 x i64> zeroinitializer, ptr %int64_t2_Val, align 16 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v3long]] Function %int64_t3_Val = alloca <3 x i64>, align 32 + store <3 x i64> zeroinitializer, ptr %int64_t3_Val, align 32 ; CHECK: %[[#]] = OpVariable %[[#ptr_Function_v4long]] Function %int64_t4_Val = alloca <4 x i64>, align 32 + store <4 x i64> zeroinitializer, ptr %int64_t4_Val, align 32 ret void } diff --git a/llvm/test/CodeGen/SPIRV/basic_int_types_spirvdis.ll b/llvm/test/CodeGen/SPIRV/basic_int_types_spirvdis.ll index 56b5f48715533..f3c8f9967211a 100644 --- a/llvm/test/CodeGen/SPIRV/basic_int_types_spirvdis.ll +++ b/llvm/test/CodeGen/SPIRV/basic_int_types_spirvdis.ll @@ -6,39 +6,51 @@ define void @main() { entry: ; CHECK: %int16_t_Val = OpVariable %_ptr_Function_ushort Function %int16_t_Val = alloca i16, align 2 + store i16 0, i16* %int16_t_Val, align 2 ; CHECK: %int_Val = OpVariable %_ptr_Function_uint Function %int_Val = alloca i32, align 4 + store i32 0, i32* %int_Val, align 4 ; CHECK: %int64_t_Val = OpVariable %_ptr_Function_ulong Function %int64_t_Val = alloca i64, align 8 + store i64 0, i64* %int64_t_Val, align 8 ; CHECK: %int16_t2_Val = OpVariable %_ptr_Function_v2ushort Function %int16_t2_Val = alloca <2 x i16>, align 4 + store <2 x i16> zeroinitializer, <2 x i16>* %int16_t2_Val, align 4 ; CHECK: %int16_t3_Val = OpVariable %_ptr_Function_v3ushort Function %int16_t3_Val = alloca <3 x i16>, align 8 + store <3 x i16> zeroinitializer, <3 x i16>* %int16_t3_Val, align 8 ; CHECK: %int16_t4_Val = OpVariable %_ptr_Function_v4ushort Function %int16_t4_Val = alloca <4 x i16>, align 8 + store <4 x i16> zeroinitializer, <4 x i16>* %int16_t4_Val, align 8 ; CHECK: %int2_Val = OpVariable %_ptr_Function_v2uint Function %int2_Val = alloca <2 x i32>, align 8 + store <2 x i32> zeroinitializer, <2 x i32>* %int2_Val, align 8 ; CHECK: %int3_Val = OpVariable %_ptr_Function_v3uint Function %int3_Val = alloca <3 x i32>, align 16 + store <3 x i32> zeroinitializer, <3 x i32>* %int3_Val, align 16 ; CHECK: %int4_Val = OpVariable %_ptr_Function_v4uint Function %int4_Val = alloca <4 x i32>, align 16 + store <4 x i32> zeroinitializer, <4 x i32>* %int4_Val, align 16 ; CHECK: %int64_t2_Val = OpVariable %_ptr_Function_v2ulong Function %int64_t2_Val = alloca <2 x i64>, align 16 + store <2 x i64> zeroinitializer, <2 x i64>* %int64_t2_Val, align 16 ; CHECK: %int64_t3_Val = OpVariable %_ptr_Function_v3ulong Function %int64_t3_Val = alloca <3 x i64>, align 32 + store <3 x i64> zeroinitializer, <3 x i64>* %int64_t3_Val, align 32 ; CHECK: %int64_t4_Val = OpVariable %_ptr_Function_v4ulong Function %int64_t4_Val = alloca <4 x i64>, align 32 + store <4 x i64> zeroinitializer, <4 x i64>* %int64_t4_Val, align 32 ret void } diff --git a/llvm/test/CodeGen/SPIRV/builtin_intrinsics_32.ll b/llvm/test/CodeGen/SPIRV/builtin_intrinsics_32.ll index 39a755e736081..bca90f4ebd151 100644 --- a/llvm/test/CodeGen/SPIRV/builtin_intrinsics_32.ll +++ b/llvm/test/CodeGen/SPIRV/builtin_intrinsics_32.ll @@ -33,6 +33,28 @@ target triple = "spirv32-unknown-unknown" ; CHECK: [[SubgroupId]] = OpVariable [[I32PTR]] Input ; CHECK: [[SubgroupLocalInvocationId]] = OpVariable [[I32PTR]] Input +@G_spv_num_workgroups_0 = global i32 0 +@G_spv_num_workgroups_1 = global i32 0 +@G_spv_num_workgroups_2 = global i32 0 +@G_spv_workgroup_size_0 = global i32 0 +@G_spv_workgroup_size_1 = global i32 0 +@G_spv_workgroup_size_2 = global i32 0 +@G_spv_group_id_0 = global i32 0 +@G_spv_group_id_1 = global i32 0 +@G_spv_group_id_2 = global i32 0 +@G_spv_thread_id_in_group_0 = global i32 0 +@G_spv_thread_id_in_group_1 = global i32 0 +@G_spv_thread_id_in_group_2 = global i32 0 +@G_spv_thread_id_0 = global i32 0 +@G_spv_thread_id_1 = global i32 0 +@G_spv_thread_id_2 = global i32 0 +@G_spv_global_size_0 = global i32 0 +@G_spv_global_size_1 = global i32 0 +@G_spv_global_size_2 = global i32 0 +@G_spv_global_offset_0 = global i32 0 +@G_spv_global_offset_1 = global i32 0 +@G_spv_global_offset_2 = global i32 0 + ; Function Attrs: convergent noinline norecurse nounwind optnone define spir_func void @test_id_and_range() { entry: @@ -44,66 +66,87 @@ entry: ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[NumWorkgroups]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 0 %spv.num.workgroups = call i32 @llvm.spv.num.workgroups.i32(i32 0) + store i32 %spv.num.workgroups, i32* @G_spv_num_workgroups_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[NumWorkgroups]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 1 %spv.num.workgroups1 = call i32 @llvm.spv.num.workgroups.i32(i32 1) + store i32 %spv.num.workgroups1, i32* @G_spv_num_workgroups_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[NumWorkgroups]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 2 %spv.num.workgroups2 = call i32 @llvm.spv.num.workgroups.i32(i32 2) + store i32 %spv.num.workgroups2, i32* @G_spv_num_workgroups_2 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[WorkgroupSize]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 0 %spv.workgroup.size = call i32 @llvm.spv.workgroup.size.i32(i32 0) + store i32 %spv.workgroup.size, i32* @G_spv_workgroup_size_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[WorkgroupSize]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 1 %spv.workgroup.size3 = call i32 @llvm.spv.workgroup.size.i32(i32 1) + store i32 %spv.workgroup.size3, i32* @G_spv_workgroup_size_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[WorkgroupSize]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 2 %spv.workgroup.size4 = call i32 @llvm.spv.workgroup.size.i32(i32 2) + store i32 %spv.workgroup.size4, i32* @G_spv_workgroup_size_2 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[WorkgroupId]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 0 %spv.group.id = call i32 @llvm.spv.group.id.i32(i32 0) + store i32 %spv.group.id, i32* @G_spv_group_id_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[WorkgroupId]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 1 %spv.group.id5 = call i32 @llvm.spv.group.id.i32(i32 1) + store i32 %spv.group.id5, i32* @G_spv_group_id_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[WorkgroupId]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 2 %spv.group.id6 = call i32 @llvm.spv.group.id.i32(i32 2) + store i32 %spv.group.id6, i32* @G_spv_group_id_2 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[LocalInvocationId]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 0 %spv.thread.id.in.group = call i32 @llvm.spv.thread.id.in.group.i32(i32 0) + store i32 %spv.thread.id.in.group, i32* @G_spv_thread_id_in_group_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[LocalInvocationId]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 1 %spv.thread.id.in.group7 = call i32 @llvm.spv.thread.id.in.group.i32(i32 1) + store i32 %spv.thread.id.in.group7, i32* @G_spv_thread_id_in_group_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[LocalInvocationId]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 2 %spv.thread.id.in.group8 = call i32 @llvm.spv.thread.id.in.group.i32(i32 2) + store i32 %spv.thread.id.in.group8, i32* @G_spv_thread_id_in_group_2 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[GlobalInvocationId]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 0 %spv.thread.id = call i32 @llvm.spv.thread.id.i32(i32 0) + store i32 %spv.thread.id, i32* @G_spv_thread_id_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[GlobalInvocationId]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 1 %spv.thread.id9 = call i32 @llvm.spv.thread.id.i32(i32 1) + store i32 %spv.thread.id9, i32* @G_spv_thread_id_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[GlobalInvocationId]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 2 %spv.thread.id10 = call i32 @llvm.spv.thread.id.i32(i32 2) + store i32 %spv.thread.id10, i32* @G_spv_thread_id_2 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[GlobalSize]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 0 %spv.num.workgroups11 = call i32 @llvm.spv.global.size.i32(i32 0) + store i32 %spv.num.workgroups11, i32* @G_spv_global_size_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[GlobalSize]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 1 %spv.num.workgroups12 = call i32 @llvm.spv.global.size.i32(i32 1) + store i32 %spv.num.workgroups12, i32* @G_spv_global_size_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[GlobalSize]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 2 %spv.num.workgroups13 = call i32 @llvm.spv.global.size.i32(i32 2) + store i32 %spv.num.workgroups13, i32* @G_spv_global_size_2 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[GlobalOffset]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 0 %spv.global.offset = call i32 @llvm.spv.global.offset.i32(i32 0) + store i32 %spv.global.offset, i32* @G_spv_global_offset_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[GlobalOffset]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 1 %spv.global.offset14 = call i32 @llvm.spv.global.offset.i32(i32 1) + store i32 %spv.global.offset14, i32* @G_spv_global_offset_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I32V3]] [[GlobalOffset]] ; CHECK: OpCompositeExtract [[I32]] [[LD]] 2 %spv.global.offset15 = call i32 @llvm.spv.global.offset.i32(i32 2) + store i32 %spv.global.offset15, i32* @G_spv_global_offset_2 ; CHECK: OpLoad %5 [[SubgroupSize]] %0 = call i32 @llvm.spv.subgroup.size() store i32 %0, ptr %ssize, align 4 diff --git a/llvm/test/CodeGen/SPIRV/builtin_intrinsics_64.ll b/llvm/test/CodeGen/SPIRV/builtin_intrinsics_64.ll index dcdf8992ce1c4..26c2d866d14c7 100644 --- a/llvm/test/CodeGen/SPIRV/builtin_intrinsics_64.ll +++ b/llvm/test/CodeGen/SPIRV/builtin_intrinsics_64.ll @@ -34,6 +34,28 @@ target triple = "spirv64-unknown-unknown" ; CHECK: [[SubgroupId]] = OpVariable [[I32PTR]] Input ; CHECK: [[SubgroupLocalInvocationId]] = OpVariable [[I32PTR]] Input +@G_spv_num_workgroups_0 = global i64 0 +@G_spv_num_workgroups_1 = global i64 0 +@G_spv_num_workgroups_2 = global i64 0 +@G_spv_workgroup_size_0 = global i64 0 +@G_spv_workgroup_size_1 = global i64 0 +@G_spv_workgroup_size_2 = global i64 0 +@G_spv_group_id_0 = global i64 0 +@G_spv_group_id_1 = global i64 0 +@G_spv_group_id_2 = global i64 0 +@G_spv_thread_id_in_group_0 = global i64 0 +@G_spv_thread_id_in_group_1 = global i64 0 +@G_spv_thread_id_in_group_2 = global i64 0 +@G_spv_thread_id_0 = global i64 0 +@G_spv_thread_id_1 = global i64 0 +@G_spv_thread_id_2 = global i64 0 +@G_spv_global_size_0 = global i64 0 +@G_spv_global_size_1 = global i64 0 +@G_spv_global_size_2 = global i64 0 +@G_spv_global_offset_0 = global i64 0 +@G_spv_global_offset_1 = global i64 0 +@G_spv_global_offset_2 = global i64 0 + ; Function Attrs: convergent noinline norecurse nounwind optnone define spir_func void @test_id_and_range() { entry: @@ -45,66 +67,87 @@ entry: ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[NumWorkgroups]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 0 %spv.num.workgroups = call i64 @llvm.spv.num.workgroups.i64(i32 0) + store i64 %spv.num.workgroups, i64* @G_spv_num_workgroups_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[NumWorkgroups]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 1 %spv.num.workgroups1 = call i64 @llvm.spv.num.workgroups.i64(i32 1) + store i64 %spv.num.workgroups1, i64* @G_spv_num_workgroups_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[NumWorkgroups]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 2 %spv.num.workgroups2 = call i64 @llvm.spv.num.workgroups.i64(i32 2) + store i64 %spv.num.workgroups2, i64* @G_spv_num_workgroups_2 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[WorkgroupSize]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 0 %spv.workgroup.size = call i64 @llvm.spv.workgroup.size.i64(i32 0) + store i64 %spv.workgroup.size, i64* @G_spv_workgroup_size_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[WorkgroupSize]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 1 %spv.workgroup.size3 = call i64 @llvm.spv.workgroup.size.i64(i32 1) + store i64 %spv.workgroup.size3, i64* @G_spv_workgroup_size_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[WorkgroupSize]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 2 %spv.workgroup.size4 = call i64 @llvm.spv.workgroup.size.i64(i32 2) + store i64 %spv.workgroup.size4, i64* @G_spv_workgroup_size_2 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[WorkgroupId]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 0 %spv.group.id = call i64 @llvm.spv.group.id.i64(i32 0) + store i64 %spv.group.id, i64* @G_spv_group_id_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[WorkgroupId]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 1 %spv.group.id5 = call i64 @llvm.spv.group.id.i64(i32 1) + store i64 %spv.group.id5, i64* @G_spv_group_id_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[WorkgroupId]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 2 %spv.group.id6 = call i64 @llvm.spv.group.id.i64(i32 2) + store i64 %spv.group.id6, i64* @G_spv_group_id_2 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[LocalInvocationId]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 0 %spv.thread.id.in.group = call i64 @llvm.spv.thread.id.in.group.i64(i32 0) + store i64 %spv.thread.id.in.group, i64* @G_spv_thread_id_in_group_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[LocalInvocationId]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 1 %spv.thread.id.in.group7 = call i64 @llvm.spv.thread.id.in.group.i64(i32 1) + store i64 %spv.thread.id.in.group7, i64* @G_spv_thread_id_in_group_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[LocalInvocationId]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 2 %spv.thread.id.in.group8 = call i64 @llvm.spv.thread.id.in.group.i64(i32 2) + store i64 %spv.thread.id.in.group8, i64* @G_spv_thread_id_in_group_2 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[GlobalInvocationId]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 0 %spv.thread.id = call i64 @llvm.spv.thread.id.i64(i32 0) + store i64 %spv.thread.id, i64* @G_spv_thread_id_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[GlobalInvocationId]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 1 %spv.thread.id9 = call i64 @llvm.spv.thread.id.i64(i32 1) + store i64 %spv.thread.id9, i64* @G_spv_thread_id_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[GlobalInvocationId]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 2 %spv.thread.id10 = call i64 @llvm.spv.thread.id.i64(i32 2) + store i64 %spv.thread.id10, i64* @G_spv_thread_id_2 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[GlobalSize]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 0 %spv.num.workgroups11 = call i64 @llvm.spv.global.size.i64(i32 0) + store i64 %spv.num.workgroups11, i64* @G_spv_global_size_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[GlobalSize]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 1 %spv.num.workgroups12 = call i64 @llvm.spv.global.size.i64(i32 1) + store i64 %spv.num.workgroups12, i64* @G_spv_global_size_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[GlobalSize]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 2 %spv.num.workgroups13 = call i64 @llvm.spv.global.size.i64(i32 2) + store i64 %spv.num.workgroups13, i64* @G_spv_global_size_2 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[GlobalOffset]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 0 %spv.global.offset = call i64 @llvm.spv.global.offset.i64(i32 0) + store i64 %spv.global.offset, i64* @G_spv_global_offset_0 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[GlobalOffset]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 1 %spv.global.offset14 = call i64 @llvm.spv.global.offset.i64(i32 1) + store i64 %spv.global.offset14, i64* @G_spv_global_offset_1 ; CHECK: [[LD:%[0-9]*]] = OpLoad [[I64V3]] [[GlobalOffset]] ; CHECK: OpCompositeExtract [[I64]] [[LD]] 2 %spv.global.offset15 = call i64 @llvm.spv.global.offset.i64(i32 2) + store i64 %spv.global.offset15, i64* @G_spv_global_offset_2 ; CHECK: OpLoad %5 [[SubgroupSize]] %0 = call i32 @llvm.spv.subgroup.size() store i32 %0, ptr %ssize, align 4 diff --git a/llvm/test/CodeGen/SPIRV/builtin_vars-decorate.ll b/llvm/test/CodeGen/SPIRV/builtin_vars-decorate.ll index 0c9b29de890d4..8dd9b387a6d84 100644 --- a/llvm/test/CodeGen/SPIRV/builtin_vars-decorate.ll +++ b/llvm/test/CodeGen/SPIRV/builtin_vars-decorate.ll @@ -81,17 +81,36 @@ @__spirv_BuiltInSubgroupId = external addrspace(1) global i32 @__spirv_BuiltInSubgroupLocalInvocationId = external addrspace(1) global i32 +@G_r1 = global i64 0 +@G_r2 = global i64 0 +@G_r3 = global i32 0 +@G_r4 = global i32 0 +@G_r5 = global i32 0 +@G_r6 = global i32 0 +@G_r7 = global i32 0 +@G_r8 = global i32 0 +@G_r9 = global i32 0 + define spir_kernel void @_Z1wv() { entry: %r1 = tail call spir_func i64 @get_global_linear_id() + store i64 %r1, i64* @G_r1 %r2 = tail call spir_func i64 @get_local_linear_id() + store i64 %r2, i64* @G_r2 %r3 = tail call spir_func i32 @get_work_dim() + store i32 %r3, i32* @G_r3 %r4 = tail call spir_func i32 @get_sub_group_size() + store i32 %r4, i32* @G_r4 %r5 = tail call spir_func i32 @get_max_sub_group_size() + store i32 %r5, i32* @G_r5 %r6 = tail call spir_func i32 @get_num_sub_groups() + store i32 %r6, i32* @G_r6 %r7 = tail call spir_func i32 @get_enqueued_num_sub_groups() + store i32 %r7, i32* @G_r7 %r8 = tail call spir_func i32 @get_sub_group_id() + store i32 %r8, i32* @G_r8 %r9 = tail call spir_func i32 @get_sub_group_local_id() + store i32 %r9, i32* @G_r9 ret void } diff --git a/llvm/test/CodeGen/SPIRV/debug-info/debug-type-pointer.ll b/llvm/test/CodeGen/SPIRV/debug-info/debug-type-pointer.ll index 04a8aac9a690d..1c2954f382f4b 100644 --- a/llvm/test/CodeGen/SPIRV/debug-info/debug-type-pointer.ll +++ b/llvm/test/CodeGen/SPIRV/debug-info/debug-type-pointer.ll @@ -126,6 +126,7 @@ define spir_func i32 @test0() !dbg !17 { %14 = load ptr addrspace(4), ptr %11, align 4, !dbg !65 store ptr addrspace(4) %14, ptr %12, align 4, !dbg !64 #dbg_declare(ptr %13, !66, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !70) + store [8 x i32] zeroinitializer, ptr %13, align 4 ret i32 0, !dbg !71 } @@ -169,6 +170,7 @@ define spir_func i32 @test1() !dbg !72 { %14 = load ptr addrspace(4), ptr %11, align 4, !dbg !97 store ptr addrspace(4) %14, ptr %12, align 4, !dbg !96 #dbg_declare(ptr %13, !98, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !99) + store [8 x i32] zeroinitializer, ptr %13, align 4 ret i32 0, !dbg !100 } diff --git a/llvm/test/CodeGen/SPIRV/event-zero-const.ll b/llvm/test/CodeGen/SPIRV/event-zero-const.ll index 523d2ad9825f3..2bf8259e78785 100644 --- a/llvm/test/CodeGen/SPIRV/event-zero-const.ll +++ b/llvm/test/CodeGen/SPIRV/event-zero-const.ll @@ -12,11 +12,15 @@ ; CHECK: OpINotEqual %[[#]] %[[#]] %[[#LongNull]] ; CHECK: OpGroupAsyncCopy %[[#EventTy]] %[[#]] %[[#]] %[[#]] %[[#]] %[[#]] %[[#EventNull]] +@G_r1 = global i1 0 +@G_e1 = global target("spirv.Event") poison define weak_odr dso_local spir_kernel void @foo(i64 %_arg_i, ptr addrspace(1) %_arg_ptr, ptr addrspace(3) %_arg_local) { entry: %r1 = icmp ne i64 %_arg_i, 0 + store i1 %r1, ptr @G_r1 %e1 = tail call spir_func target("spirv.Event") @__spirv_GroupAsyncCopy(i32 2, ptr addrspace(3) %_arg_local, ptr addrspace(1) %_arg_ptr, i64 1, i64 1, target("spirv.Event") zeroinitializer) + store target("spirv.Event") %e1, ptr @G_e1 ret void } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fun-ptr-addrcast.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fun-ptr-addrcast.ll index e5736b88b63a3..a9a0d3358f8cc 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fun-ptr-addrcast.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fun-ptr-addrcast.ll @@ -11,15 +11,22 @@ @G1 = addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr @bar to ptr addrspace(4))] } @G2 = addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) addrspacecast (ptr null to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr @bar to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4))] } +@G_r1_foo = global ptr addrspace(4) null +@G_r2_foo = global ptr addrspace(4) null +@G_r1_bar = global ptr addrspace(4) null + define void @foo(ptr addrspace(4) %p) { entry: %r1 = addrspacecast ptr @foo to ptr addrspace(4) + store ptr addrspace(4) %r1, ptr @G_r1_foo %r2 = addrspacecast ptr null to ptr addrspace(4) + store ptr addrspace(4) %r2, ptr @G_r2_foo ret void } define void @bar(ptr addrspace(4) %p) { entry: %r1 = addrspacecast ptr @bar to ptr addrspace(4) + store ptr addrspace(4) %r1, ptr @G_r1_bar ret void } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16.ll index 22668e71fb257..92652f1faefc0 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16.ll @@ -12,11 +12,16 @@ target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" +@G1 = global bfloat 0.0 +@G2 = global <2 x bfloat> zeroinitializer + define spir_kernel void @test() { entry: %addr1 = alloca bfloat %addr2 = alloca <2 x bfloat> %data1 = load bfloat, ptr %addr1 %data2 = load <2 x bfloat>, ptr %addr2 + store bfloat %data1, ptr @G1 + store <2 x bfloat> %data2, ptr @G2 ret void } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_float_controls2/decoration.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_float_controls2/decoration.ll index d3fe9e43450cd..81497f26f1aef 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_float_controls2/decoration.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_float_controls2/decoration.ll @@ -79,6 +79,54 @@ ; CHECK: OpDecorate %[[#maxResV]] FPFastMathMode NotNaN|NotInf|NSZ|AllowRecip|AllowContract|AllowReassoc|AllowTransform ; CHECK: OpDecorate %[[#maxCommonResV]] FPFastMathMode NotNaN|NotInf +@G_addRes = global float 0.0 +@G_subRes = global float 0.0 +@G_mulRes = global float 0.0 +@G_divRes = global float 0.0 +@G_remRes = global float 0.0 +@G_negRes = global float 0.0 +@G_oeqRes = global i1 0 +@G_oneRes = global i1 0 +@G_oltRes = global i1 0 +@G_ogtRes = global i1 0 +@G_oleRes = global i1 0 +@G_ogeRes = global i1 0 +@G_ordRes = global i1 0 +@G_ueqRes = global i1 0 +@G_uneRes = global i1 0 +@G_ultRes = global i1 0 +@G_ugtRes = global i1 0 +@G_uleRes = global i1 0 +@G_ugeRes = global i1 0 +@G_unoRes = global i1 0 +@G_modRes = global float 0.0 +@G_maxRes = global float 0.0 +@G_maxCommonRes = global float 0.0 + +@G_addResV = global <2 x float> zeroinitializer +@G_subResV = global <2 x float> zeroinitializer +@G_mulResV = global <2 x float> zeroinitializer +@G_divResV = global <2 x float> zeroinitializer +@G_remResV = global <2 x float> zeroinitializer +@G_negResV = global <2 x float> zeroinitializer +@G_oeqResV = global <2 x i1> zeroinitializer +@G_oneResV = global <2 x i1> zeroinitializer +@G_oltResV = global <2 x i1> zeroinitializer +@G_ogtResV = global <2 x i1> zeroinitializer +@G_oleResV = global <2 x i1> zeroinitializer +@G_ogeResV = global <2 x i1> zeroinitializer +@G_ordResV = global <2 x i1> zeroinitializer +@G_ueqResV = global <2 x i1> zeroinitializer +@G_uneResV = global <2 x i1> zeroinitializer +@G_ultResV = global <2 x i1> zeroinitializer +@G_ugtResV = global <2 x i1> zeroinitializer +@G_uleResV = global <2 x i1> zeroinitializer +@G_ugeResV = global <2 x i1> zeroinitializer +@G_unoResV = global <2 x i1> zeroinitializer +@G_modResV = global <2 x float> zeroinitializer +@G_maxResV = global <2 x float> zeroinitializer +@G_maxCommonResV = global <2 x float> zeroinitializer + ; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none) declare spir_func float @_Z4fmodff(float, float) declare dso_local spir_func noundef nofpclass(nan inf) float @_Z16__spirv_ocl_fmaxff(float noundef nofpclass(nan inf), float noundef nofpclass(nan inf)) local_unnamed_addr #1 @@ -91,55 +139,101 @@ declare dso_local spir_func noundef nofpclass(nan inf) <2 x float> @_Z23__spirv_ define weak_odr dso_local spir_kernel void @foo(float %1, float %2) { entry: %addRes = fadd float %1, %2 + store float %addRes, float* @G_addRes %subRes = fsub nnan float %1, %2 + store float %subRes, float* @G_subRes %mulRes = fmul ninf float %1, %2 + store float %mulRes, float* @G_mulRes %divRes = fdiv nsz float %1, %2 + store float %divRes, float* @G_divRes %remRes = frem arcp float %1, %2 + store float %remRes, float* @G_remRes %negRes = fneg fast float %1 + store float %negRes, float* @G_negRes %oeqRes = fcmp nnan ninf oeq float %1, %2 + store i1 %oeqRes, i1* @G_oeqRes %oneRes = fcmp one float %1, %2, !spirv.Decorations !3 + store i1 %oneRes, i1* @G_oneRes %oltRes = fcmp nnan olt float %1, %2, !spirv.Decorations !3 + store i1 %oltRes, i1* @G_oltRes %ogtRes = fcmp ninf ogt float %1, %2, !spirv.Decorations !3 + store i1 %ogtRes, i1* @G_ogtRes %oleRes = fcmp nsz ole float %1, %2, !spirv.Decorations !3 + store i1 %oleRes, i1* @G_oleRes %ogeRes = fcmp arcp oge float %1, %2, !spirv.Decorations !3 + store i1 %ogeRes, i1* @G_ogeRes %ordRes = fcmp fast ord float %1, %2, !spirv.Decorations !3 + store i1 %ordRes, i1* @G_ordRes %ueqRes = fcmp nnan ninf ueq float %1, %2, !spirv.Decorations !3 + store i1 %ueqRes, i1* @G_ueqRes %uneRes = fcmp une float %1, %2, !spirv.Decorations !3 + store i1 %uneRes, i1* @G_uneRes %ultRes = fcmp ult float %1, %2, !spirv.Decorations !3 + store i1 %ultRes, i1* @G_ultRes %ugtRes = fcmp ugt float %1, %2, !spirv.Decorations !3 + store i1 %ugtRes, i1* @G_ugtRes %uleRes = fcmp ule float %1, %2, !spirv.Decorations !3 + store i1 %uleRes, i1* @G_uleRes %ugeRes = fcmp uge float %1, %2, !spirv.Decorations !3 + store i1 %ugeRes, i1* @G_ugeRes %unoRes = fcmp uno float %1, %2, !spirv.Decorations !3 + store i1 %unoRes, i1* @G_unoRes %modRes = call spir_func float @_Z4fmodff(float %1, float %2) + store float %modRes, float* @G_modRes %maxRes = tail call fast spir_func noundef nofpclass(nan inf) float @_Z16__spirv_ocl_fmaxff(float noundef nofpclass(nan inf) %1, float noundef nofpclass(nan inf) %2) + store float %maxRes, float* @G_maxRes %maxCommonRes = tail call spir_func noundef float @_Z23__spirv_ocl_fmax_commonff(float noundef nofpclass(nan inf) %1, float noundef nofpclass(nan inf) %2) + store float %maxCommonRes, float* @G_maxCommonRes ret void } define weak_odr dso_local spir_kernel void @fooV(<2 x float> %v1, <2 x float> %v2) { %addResV = fadd <2 x float> %v1, %v2 + store <2 x float> %addResV, <2 x float>* @G_addResV %subResV = fsub nnan <2 x float> %v1, %v2 + store <2 x float> %subResV, <2 x float>* @G_subResV %mulResV = fmul ninf <2 x float> %v1, %v2 + store <2 x float> %mulResV, <2 x float>* @G_mulResV %divResV = fdiv nsz <2 x float> %v1, %v2 + store <2 x float> %divResV, <2 x float>* @G_divResV %remResV = frem arcp <2 x float> %v1, %v2 + store <2 x float> %remResV, <2 x float>* @G_remResV %negResV = fneg fast <2 x float> %v1 + store <2 x float> %negResV, <2 x float>* @G_negResV %oeqResV = fcmp nnan ninf oeq <2 x float> %v1, %v2 + store <2 x i1> %oeqResV, <2 x i1>* @G_oeqResV %oneResV = fcmp one <2 x float> %v1, %v2, !spirv.Decorations !3 + store <2 x i1> %oneResV, <2 x i1>* @G_oneResV %oltResV = fcmp nnan olt <2 x float> %v1, %v2, !spirv.Decorations !3 + store <2 x i1> %oltResV, <2 x i1>* @G_oltResV %ogtResV = fcmp ninf ogt <2 x float> %v1, %v2, !spirv.Decorations !3 + store <2 x i1> %ogtResV, <2 x i1>* @G_ogtResV %oleResV = fcmp nsz ole <2 x float> %v1, %v2, !spirv.Decorations !3 + store <2 x i1> %oleResV, <2 x i1>* @G_oleResV %ogeResV = fcmp arcp oge <2 x float> %v1, %v2, !spirv.Decorations !3 + store <2 x i1> %ogeResV, <2 x i1>* @G_ogeResV %ordResV = fcmp fast ord <2 x float> %v1, %v2, !spirv.Decorations !3 + store <2 x i1> %ordResV, <2 x i1>* @G_ordResV %ueqResV = fcmp nnan ninf ueq <2 x float> %v1, %v2, !spirv.Decorations !3 + store <2 x i1> %ueqResV, <2 x i1>* @G_ueqResV %uneResV = fcmp une <2 x float> %v1, %v2, !spirv.Decorations !3 + store <2 x i1> %uneResV, <2 x i1>* @G_uneResV %ultResV = fcmp ult <2 x float> %v1, %v2, !spirv.Decorations !3 + store <2 x i1> %ultResV, <2 x i1>* @G_ultResV %ugtResV = fcmp ugt <2 x float> %v1, %v2, !spirv.Decorations !3 + store <2 x i1> %ugtResV, <2 x i1>* @G_ugtResV %uleResV = fcmp ule <2 x float> %v1, %v2, !spirv.Decorations !3 + store <2 x i1> %uleResV, <2 x i1>* @G_uleResV %ugeResV = fcmp uge <2 x float> %v1, %v2, !spirv.Decorations !3 + store <2 x i1> %ugeResV, <2 x i1>* @G_ugeResV %unoResV = fcmp uno <2 x float> %v1, %v2, !spirv.Decorations !3 + store <2 x i1> %unoResV, <2 x i1>* @G_unoResV %modResV = call spir_func <2 x float> @_Z4fmodDv2_fDv2_f(<2 x float> %v1, <2 x float> %v2) + store <2 x float> %modResV, <2 x float>* @G_modResV %maxResV = tail call fast spir_func noundef nofpclass(nan inf) <2 x float> @_Z16__spirv_ocl_fmaxDv2_fDv2_f(<2 x float> noundef nofpclass(nan inf) %v1, <2 x float> noundef nofpclass(nan inf) %v2) + store <2 x float> %maxResV, <2 x float>* @G_maxResV %maxCommonResV = tail call spir_func noundef <2 x float> @_Z23__spirv_ocl_fmax_commonDv2_fDv2_f(<2 x float> noundef nofpclass(nan inf) %v1, <2 x float> noundef nofpclass(nan inf) %v2) + store <2 x float> %maxCommonResV, <2 x float>* @G_maxCommonResV ret void } diff --git a/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions-but-one.ll b/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions-but-one.ll index 4db0ba33d52c9..face4a9f5e615 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions-but-one.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions-but-one.ll @@ -2,10 +2,15 @@ ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=KHR %s -o - | FileCheck %s ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=khr %s -o - | FileCheck %s +@G = global i32 0 + define i6 @foo() { %call = tail call i32 @llvm.bitreverse.i32(i32 42) + store i32 %call, ptr @G ret i6 2 } ; CHECK-NOT: OpExtension "SPV_INTEL_arbitrary_precision_integers" ; CHECK-DAG: OpExtension "SPV_KHR_bit_instructions" + +declare i32 @llvm.bitreverse.i32(i32) diff --git a/llvm/test/CodeGen/SPIRV/freeze.ll b/llvm/test/CodeGen/SPIRV/freeze.ll index 9077d2ede72a9..4f7e7794ed03b 100644 --- a/llvm/test/CodeGen/SPIRV/freeze.ll +++ b/llvm/test/CodeGen/SPIRV/freeze.ll @@ -1,15 +1,15 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s ; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; CHECK: OpName %[[Arg1:.*]] "arg1" -; CHECK: OpName %[[Arg2:.*]] "arg2" -; CHECK: OpName %[[NotAStaticPoison:.*]] "poison1" -; CHECK: OpName %[[NotAStaticPoison]] "nil0" -; CHECK: OpName %[[StaticPoisonIntFreeze:.*]] "nil1" -; CHECK: OpName %[[StaticPoisonFloatFreeze:.*]] "nil2" -; CHECK: OpName %[[Arg1]] "val1" -; CHECK: OpName %[[Const100:.*]] "val2" -; CHECK: OpName %[[Const100]] "val3" +; CHECK-DAG: OpName %[[Arg1:.*]] "arg1" +; CHECK-DAG: OpName %[[Arg2:.*]] "arg2" +; CHECK-DAG: OpName %[[NotAStaticPoison:.*]] "poison1" +; CHECK-DAG: OpName %[[NotAStaticPoison]] "nil0" +; CHECK-DAG: OpName %[[StaticPoisonIntFreeze:.*]] "nil1" +; CHECK-DAG: OpName %[[StaticPoisonFloatFreeze:.*]] "nil2" +; CHECK-DAG: OpName %[[Arg1]] "val1" +; CHECK-DAG: OpName %[[Const100:.*]] "val2" +; CHECK-DAG: OpName %[[Const100]] "val3" ; CHECK: OpDecorate ; CHECK-DAG: %[[FloatTy:.*]] = OpTypeFloat 32 ; CHECK-DAG: %[[ShortTy:.*]] = OpTypeInt 16 0 @@ -18,17 +18,37 @@ ; CHECK-DAG: %[[Undef32:.*]] = OpUndef %[[IntTy]] ; CHECK-DAG: %[[UndefFloat:.*]] = OpUndef %[[FloatTy]] ; CHECK-DAG: %[[Const100]] = OpConstant %[[IntTy]] 100 -; CHECK: %[[Arg1]] = OpFunctionParameter %[[FloatTy]] -; CHECK: %[[NotAStaticPoison]] = OpIAdd %[[ShortTy]] %[[Arg2]] %[[Undef16]] -define spir_func void @foo(float %arg1, i16 %arg2) { +define spir_func i16 @test_nil0(i16 %arg2) { entry: +; CHECK: %[[NotAStaticPoison]] = OpIAdd %[[ShortTy]] %[[Arg2]] %[[Undef16]] %poison1 = add i16 %arg2, undef %nil0 = freeze i16 %poison1 + ret i16 %nil0 +} + +define spir_func i32 @test_nil1() { +entry: %nil1 = freeze i32 undef + ret i32 %nil1 +} + +define spir_func float @test_nil2() { +entry: %nil2 = freeze float poison + ret float %nil2 +} + +define spir_func float @freeze_float(float %arg1) { +entry: +; CHECK: %[[Arg1]] = OpFunctionParameter %[[FloatTy]] %val1 = freeze float %arg1 + ret float %val1 +} + +define spir_func i32 @foo() { +entry: %val2 = freeze i32 100 %val3 = freeze i32 %val2 - ret void + ret i32 %val3 } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll index a97492b8453ea..a15d628cc3614 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll @@ -63,7 +63,7 @@ entry: ; CHECK: %[[#a_high:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#a]] %[[#undef_v4i32]] 1 3 ; CHECK: %[[#b_low:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#b]] %[[#undef_v4i32]] 0 2 ; CHECK: %[[#b_high:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#b]] %[[#undef_v4i32]] 1 3 -; CHECK: %[[#iaddcarry:]] = OpIAddCarry %[[#struct_v2i32_v2i32]] %[[#a_low]] %[[#vec2_int_32]] +; CHECK: %[[#iaddcarry:]] = OpIAddCarry %[[#struct_v2i32_v2i32]] %[[#a_low]] %[[#b_low]] ; CHECK: %[[#lowsum:]] = OpCompositeExtract %[[#vec2_int_32]] %[[#iaddcarry]] 0 ; CHECK: %[[#carry:]] = OpCompositeExtract %[[#vec2_int_32]] %[[#iaddcarry]] 1 ; CHECK: %[[#carry_ne0:]] = OpINotEqual %[[#vec2_bool]] %[[#carry]] %[[#const_v2i32_0_0]] diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/abs.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/abs.ll index 4a15fa8b14537..75fac211f1108 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/abs.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/abs.ll @@ -3,24 +3,25 @@ ; CHECK: OpExtInstImport "GLSL.std.450" +@i = global i32 0, align 4 +@absi = global i32 0, align 4 +@f = global float 0.0, align 4 +@absf = global float 0.0, align 4 + define void @main() #1 { entry: - %i = alloca i32, align 4 - %absi = alloca i32, align 4 - %f = alloca float, align 4 - %absf = alloca float, align 4 - %0 = load i32, ptr %i, align 4 + %0 = load i32, ptr @i, align 4 ; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] SAbs %[[#]] %elt.abs = call i32 @llvm.abs.i32(i32 %0, i1 false) - store i32 %elt.abs, ptr %absi, align 4 - %1 = load float, ptr %f, align 4 + store i32 %elt.abs, ptr @absi, align 4 + %1 = load float, ptr @f, align 4 ; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FAbs %[[#]] %elt.abs1 = call float @llvm.fabs.f32(float %1) - store float %elt.abs1, ptr %absf, align 4 + store float %elt.abs1, ptr @absf, align 4 ret void } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log10.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log10.ll index 7583066c01cf8..dceaa8c209957 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log10.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log10.ll @@ -7,21 +7,23 @@ ; CHECK: %[[#v4float:]] = OpTypeVector %[[#float]] 4 ; CHECK: %[[#float_0_30103001:]] = OpConstant %[[#float]] 0.30103000998497009 +@logf = global float 0.0, align 4 +@logf4 = global <4 x float> zeroinitializer, align 16 + define void @main(float %f, <4 x float> %f4) { entry: ; CHECK-DAG: %[[#f:]] = OpFunctionParameter %[[#float]] ; CHECK-DAG: %[[#f4:]] = OpFunctionParameter %[[#v4float]] - %logf = alloca float, align 4 - %logf4 = alloca <4 x float>, align 16 - ; CHECK: %[[#log2:]] = OpExtInst %[[#float]] %[[#extinst]] Log2 %[[#f]] ; CHECK: %[[#res:]] = OpFMul %[[#float]] %[[#log2]] %[[#float_0_30103001]] %elt.log10 = call float @llvm.log10.f32(float %f) + store float %elt.log10, ptr @logf, align 4 ; CHECK: %[[#log2:]] = OpExtInst %[[#v4float]] %[[#extinst]] Log2 %[[#f4]] ; CHECK: %[[#res:]] = OpVectorTimesScalar %[[#v4float]] %[[#log2]] %[[#float_0_30103001]] %elt.log101 = call <4 x float> @llvm.log10.v4f32(<4 x float> %f4) + store <4 x float> %elt.log101, ptr @logf4, align 16 ret void } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-peeled-array-minimal.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-peeled-array-minimal.ll new file mode 100644 index 0000000000000..fc12f0f0592fe --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-peeled-array-minimal.ll @@ -0,0 +1,90 @@ +; RUN: llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpDecorate %[[ARRAY:[0-9]+]] ArrayStride 16 +; CHECK-DAG: OpMemberDecorate %[[CBLAYOUT:[0-9]+]] 0 Offset 0 +; CHECK-DAG: OpMemberDecorate %[[CBLAYOUT]] 1 Offset 52 +; CHECK-DAG: OpMemberDecorate %[[WRAPPER:[0-9]+]] 0 Offset 0 +; CHECK-DAG: OpDecorate %[[WRAPPER]] Block +; CHECK-DAG: OpMemberDecorate %[[STRUCT:[0-9]+]] 0 Offset 0 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_PAD:[0-9]+]] 0 Offset 0 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_PAD]] 1 Offset 4 + +; CHECK-DAG: %[[FLOAT:[0-9]+]] = OpTypeFloat 32 +; CHECK-DAG: %[[STRUCT]] = OpTypeStruct %[[FLOAT]] +; CHECK-DAG: %[[I8:[0-9]+]] = OpTypeInt 8 0 +; CHECK-DAG: %[[STRUCT_PAD]] = OpTypeStruct %[[STRUCT]] %[[I8]] +; CHECK-DAG: %[[UINT:[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: %[[CONST_4:[0-9]+]] = OpConstant %[[UINT]] 4 +; CHECK-DAG: %[[ARRAY]] = OpTypeArray %[[STRUCT_PAD]] %[[CONST_4]] +; CHECK-DAG: %[[CBLAYOUT]] = OpTypeStruct %[[ARRAY]] %[[FLOAT]] +; CHECK-DAG: %[[WRAPPER]] = OpTypeStruct %[[CBLAYOUT]] +; CHECK-DAG: %[[PTR_WRAPPER:[0-9]+]] = OpTypePointer Uniform %[[WRAPPER]] +; CHECK-DAG: %[[ZERO:[0-9]+]] = OpConstant %[[UINT]] 0 +; CHECK-DAG: %[[MYCBUFFER:[0-9]+]] = OpVariable %[[PTR_WRAPPER]] Uniform + +; CHECK-DAG: %[[I64:[0-9]+]] = OpTypeInt 64 0 +; CHECK-DAG: %[[STRUCT2:[0-9]+]] = OpTypeStruct %[[I64]] %[[UINT]] +; CHECK-DAG: %[[CONST_3:[0-9]+]] = OpConstant %[[UINT]] 3 +; CHECK-DAG: %[[ARRAY2:[0-9]+]] = OpTypeArray %[[STRUCT2]] %[[CONST_3]] +; CHECK-DAG: %[[CBLAYOUT2:[0-9]+]] = OpTypeStruct %[[ARRAY2]] %[[I64]] +; CHECK-DAG: %[[PTR_PRIVATE:[0-9]+]] = OpTypePointer Private %[[CBLAYOUT2]] +; CHECK-DAG: %[[MYPRIVATEVAR:[0-9]+]] = OpVariable %[[PTR_PRIVATE]] Private + +%__cblayout_MyCBuffer = type <{ <{ [3 x <{ %OrigType, target("spirv.Padding", 12) }>], %OrigType }>, float }> +%OrigType = type <{ float }> + +%__cblayout_MyCBuffer2 = type <{ [ 3 x <{ i64, i32 }> ], i64 }> + +@MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) poison +@myPrivateVar = internal addrspace(10) global %__cblayout_MyCBuffer2 poison + +@myArray = external hidden local_unnamed_addr addrspace(12) global <{ [3 x <{ %OrigType, target("spirv.Padding", 12) }>], %OrigType }>, align 1 +@MyCBuffer.str = private unnamed_addr constant [10 x i8] c"MyCBuffer\00", align 1 +@.str = private unnamed_addr constant [7 x i8] c"output\00", align 1 + +declare target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_s___cblayout_MyCBuffers_2_0t(i32, i32, i32, i32, ptr) + +define void @main() #1 { +entry: +; CHECK: %[[BUFFER_HANDLE:[0-9]+]] = OpCopyObject %[[PTR_WRAPPER]] %[[MYCBUFFER]] +; CHECK: %[[ACCESS_ARRAY:[0-9]+]] = OpAccessChain {{%[0-9]+}} %[[BUFFER_HANDLE]] %[[ZERO]] %[[ZERO]] + %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_s___cblayout_MyCBuffers_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str) + store target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8 + + %0 = tail call target("spirv.Image", float, 5, 2, 0, 0, 2, 1) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_f32_5_2_0_0_2_1t(i32 1, i32 0, i32 1, i32 0, ptr nonnull @.str) + %1 = tail call i32 @llvm.spv.thread.id.i32(i32 0) + %rem.i = and i32 %1, 3 + +; CHECK: %[[IDX_CONV:[0-9]+]] = OpUConvert {{.*}} + %idxprom.i = zext nneg i32 %rem.i to i64 + +; CHECK: %[[PTR_ELEM:[0-9]+]] = OpAccessChain {{%[0-9]+}} %[[ACCESS_ARRAY]] %[[IDX_CONV]] + %cbufferidx.i = getelementptr <{ %OrigType, target("spirv.Padding", 12) }>, ptr addrspace(12) @myArray, i64 %idxprom.i + +; CHECK: %[[PTR_FIELD:[0-9]+]] = OpAccessChain {{%[0-9]+}} %[[PTR_ELEM]] %[[ZERO]] %[[ZERO]] +; CHECK: %[[VAL_FLOAT:[0-9]+]] = OpLoad %[[FLOAT]] %[[PTR_FIELD]] Aligned 4 + %2 = load float, ptr addrspace(12) %cbufferidx.i, align 4 + + %val = load i64, ptr addrspace(10) getelementptr (%__cblayout_MyCBuffer2, ptr addrspace(10) @myPrivateVar, i32 0, i32 1), align 8 + %val.float = sitofp i64 %val to float + + %vecinit4.i = insertelement <4 x float> , float %2, i64 0 + %vecinit4.i.2 = insertelement <4 x float> %vecinit4.i, float %val.float, i64 1 + %3 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_f32_5_2_0_0_2_1t(target("spirv.Image", float, 5, 2, 0, 0, 2, 1) %0, i32 0) + store <4 x float> %vecinit4.i.2, ptr addrspace(11) %3, align 16 +; CHECK: OpImageWrite {{%[0-9]+}} {{%[0-9]+}} {{%[0-9]+}} + ret void +} + +declare i32 @llvm.spv.thread.id.i32(i32) + +declare target("spirv.Image", float, 5, 2, 0, 0, 2, 1) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_f32_5_2_0_0_2_1t(i32, i32, i32, i32, ptr) + +declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_f32_5_2_0_0_2_1t(target("spirv.Image", float, 5, 2, 0, 0, 2, 1), i32) + +attributes #1 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + +!hlsl.cbs = !{!0} + +!0 = distinct !{ptr @MyCBuffer.cb, ptr addrspace(12) @myArray, null} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-peeled-array.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-peeled-array.ll new file mode 100644 index 0000000000000..fb93d53b337b3 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-peeled-array.ll @@ -0,0 +1,74 @@ +; RUN: llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} + + +; CHECK-DAG: %[[FLOAT:[0-9]+]] = OpTypeFloat 32 +; CHECK-DAG: %[[VEC3:[0-9]+]] = OpTypeVector %[[FLOAT]] 3 +; CHECK-DAG: %[[I8:[0-9]+]] = OpTypeInt 8 0 +; CHECK-DAG: %[[STRUCT_PAD:[0-9]+]] = OpTypeStruct %[[VEC3]] %[[I8]] +; CHECK-DAG: %[[UINT:[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: %[[CONST_3:[0-9]+]] = OpConstant %[[UINT]] 3 +; CHECK-DAG: %[[ARRAY:[0-9]+]] = OpTypeArray %[[STRUCT_PAD]] %[[CONST_3]] +; CHECK-DAG: %[[CBLAYOUT:[0-9]+]] = OpTypeStruct %[[ARRAY]] +; CHECK-DAG: OpMemberDecorate %[[CBLAYOUT]] 0 Offset 0 +; CHECK-DAG: %[[WRAPPER:[0-9]+]] = OpTypeStruct %[[CBLAYOUT]] +; CHECK-DAG: %[[PTR_WRAPPER:[0-9]+]] = OpTypePointer Uniform %[[WRAPPER]] +; CHECK-DAG: %[[ZERO:[0-9]+]] = OpConstant %[[UINT]] 0 +; CHECK-DAG: %[[MYCBUFFER:[0-9]+]] = OpVariable %[[PTR_WRAPPER]] Uniform + + +; TODO(168401): This array stride and offset of element 1 are incorrect. This +; is an issue with how 3 element vectors are handled. +; CHECK-DAG: OpDecorate %[[ARRAY]] ArrayStride 20 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_PAD]] 0 Offset 0 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_PAD]] 1 Offset 16 +; CHECK-DAG: OpMemberDecorate %[[WRAPPER]] 0 Offset 0 +; CHECK-DAG: OpDecorate %[[WRAPPER]] Block +%__cblayout_MyCBuffer = type <{ <{ [2 x <{ <3 x float>, target("spirv.Padding", 4) }>], <3 x float> }> }> + +@MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) poison +@myArray = external hidden local_unnamed_addr addrspace(12) global <{ [2 x <{ <3 x float>, target("spirv.Padding", 4) }>], <3 x float> }>, align 16 +@MyCBuffer.str = private unnamed_addr constant [10 x i8] c"MyCBuffer\00", align 1 +@.str = private unnamed_addr constant [7 x i8] c"output\00", align 1 + +declare target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_s___cblayout_MyCBuffers_2_0t(i32, i32, i32, i32, ptr) + +define void @main() #1 { +entry: +; CHECK: %[[BUFFER_HANDLE:[0-9]+]] = OpCopyObject %[[PTR_WRAPPER]] %[[MYCBUFFER]] +; CHECK: %[[ACCESS_ARRAY:[0-9]+]] = OpAccessChain {{%[0-9]+}} %[[BUFFER_HANDLE]] %[[ZERO]] %[[ZERO]] + %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_s___cblayout_MyCBuffers_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str) + store target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8 + + %0 = tail call target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v3f32_12_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) + %1 = tail call i32 @llvm.spv.thread.id.i32(i32 0) + +; CHECK: %[[IDX:[0-9]+]] = OpUMod %[[UINT]] {{%[0-9]+}} %[[CONST_3]] + %rem.i = urem i32 %1, 3 + +; CHECK: %[[IDX_CONV:[0-9]+]] = OpUConvert {{.*}} %[[IDX]] + %idxprom.i = zext nneg i32 %rem.i to i64 + +; CHECK: %[[PTR_ELEM:[0-9]+]] = OpAccessChain {{%[0-9]+}} %[[ACCESS_ARRAY]] %[[IDX_CONV]] + %cbufferidx.i = getelementptr <{ <3 x float>, target("spirv.Padding", 4) }>, ptr addrspace(12) @myArray, i64 %idxprom.i + +; CHECK: %[[PTR_FIELD:[0-9]+]] = OpAccessChain {{%[0-9]+}} %[[PTR_ELEM]] {{.*}} +; CHECK: %[[VAL_VEC3:[0-9]+]] = OpLoad %[[VEC3]] %[[PTR_FIELD]] Aligned 16 + %2 = load <3 x float>, ptr addrspace(12) %cbufferidx.i, align 16 + + %3 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v3f32_12_1t(target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 1) %0, i32 %1) + store <3 x float> %2, ptr addrspace(11) %3, align 16 + ret void +} + +declare i32 @llvm.spv.thread.id.i32(i32) + +declare target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v3f32_12_1t(i32, i32, i32, i32, ptr) + +declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v3f32_12_1t(target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 1), i32) + +attributes #1 = { "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" } + +!hlsl.cbs = !{!0} + +!0 = !{ptr @MyCBuffer.cb, ptr addrspace(12) @myArray} diff --git a/llvm/test/CodeGen/SPIRV/instructions/insertvalue-undef-ptr.ll b/llvm/test/CodeGen/SPIRV/instructions/insertvalue-undef-ptr.ll index b788f34bf7238..02825e3cbb599 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/insertvalue-undef-ptr.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/insertvalue-undef-ptr.ll @@ -4,25 +4,40 @@ ; CHECK-LABEL: Begin function original_testcase define fastcc void @original_testcase() { top: + %0 = alloca [1 x ptr], align 4 ; CHECK: OpCompositeInsert - %0 = insertvalue [1 x ptr] zeroinitializer, ptr poison, 0 + %1 = insertvalue [1 x ptr] zeroinitializer, ptr poison, 0 + store [1 x ptr] %1, ptr %0 ret void } ; CHECK-LABEL: Begin function additional_testcases define fastcc void @additional_testcases() { top: + %0 = alloca [2 x ptr], align 4 + + ; Test with different pointer types ; CHECK: OpCompositeInsert %1 = insertvalue [1 x ptr] zeroinitializer, ptr undef, 0 + ; CHECK: OpStore + store [1 x ptr] %1, ptr %0 + ; CHECK-NEXT: OpCompositeInsert %2 = insertvalue {ptr, i32} zeroinitializer, ptr poison, 0 + ; CHECK: OpStore + store {ptr, i32} %2, ptr %0 + ; CHECK-NEXT: OpCompositeInsert %3 = insertvalue {ptr, ptr} undef, ptr null, 0 + ; CHECK: OpStore + store {ptr, ptr} %3, ptr %0 ; Test with undef aggregate ; CHECK-NEXT: OpCompositeInsert %4 = insertvalue [1 x ptr] undef, ptr undef, 0 + ; CHECK: OpStore + store [1 x ptr] %4, ptr %0 ret void } diff --git a/llvm/test/CodeGen/SPIRV/instructions/select-ptr-load.ll b/llvm/test/CodeGen/SPIRV/instructions/select-ptr-load.ll index 6e6cd2f68a971..510c7954c78f8 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/select-ptr-load.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/select-ptr-load.ll @@ -13,13 +13,18 @@ %struct = type { [3 x float] } +@G = global float 0.0 + define spir_kernel void @bar(i1 %sw) { entry: %var1 = alloca %struct + store %struct zeroinitializer, ptr %var1 %var2 = alloca %struct + store %struct zeroinitializer, ptr %var2 %elem1 = getelementptr inbounds [3 x float], ptr %var1, i64 0, i64 0 %elem2 = getelementptr inbounds [3 x float], ptr %var2, i64 0, i64 1 %elem = select i1 %sw, ptr %elem1, ptr %elem2 %res = load float, ptr %elem + store float %res, ptr @G ret void } diff --git a/llvm/test/CodeGen/SPIRV/keep-tracked-const.ll b/llvm/test/CodeGen/SPIRV/keep-tracked-const.ll deleted file mode 100644 index efde6a2c082fc..0000000000000 --- a/llvm/test/CodeGen/SPIRV/keep-tracked-const.ll +++ /dev/null @@ -1,23 +0,0 @@ -; This test case ensures that cleaning of temporary constants doesn't purge tracked ones. - -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} - -; CHECK-SPIRV-DAG: %[[#Int:]] = OpTypeInt 8 0 -; CHECK-SPIRV-DAG: %[[#C0:]] = OpConstantNull %[[#Int]] -; CHECK-SPIRV-DAG: %[[#C1:]] = OpConstant %[[#Int]] 1{{$}} - -define spir_kernel void @foo() { -entry: - %addr = alloca i32 - %r1 = call i8 @_Z20__spirv_SpecConstantia(i32 0, i8 1) - ; The name '%conv17.i' is important for the test case, - ; because it includes i32 0 when encoded for SPIR-V usage. - %conv17.i = sext i8 %r1 to i64 - %tobool = trunc i8 %r1 to i1 - %r2 = zext i1 %tobool to i32 - store i32 %r2, ptr %addr - ret void -} - -declare i8 @_Z20__spirv_SpecConstantia(i32, i8) diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/assume.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/assume.ll index 3d2080e0050b7..691325251f11d 100644 --- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/assume.ll +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/assume.ll @@ -8,14 +8,15 @@ %class.anon = type { i8 } -define spir_func void @_Z3fooi(i32 %x) { +define spir_func i32 @_Z3fooi(i32 %x) { entry: %x.addr = alloca i32, align 4 store i32 %x, i32* %x.addr, align 4 - %0 = load i32, i32* %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 %cmp = icmp ne i32 %0, 0 call void @llvm.assume(i1 %cmp) - ret void + %retval = select i1 %cmp, i32 100, i32 10 + ret i32 %retval } declare void @llvm.assume(i1) @@ -45,9 +46,9 @@ entry: call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) store i32 1, i32* %a, align 4 %1 = load i32, i32* %a, align 4 - call spir_func void @_Z3fooi(i32 %1) - %2 = bitcast i32* %a to i8* - call void @llvm.lifetime.end.p0i8(i64 4, i8* %2) + %2 = call spir_func i32 @_Z3fooi(i32 %1) + %3 = bitcast i32* %a to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %3) ret void } diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll index 438fff6e94f89..18856147896bb 100644 --- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll @@ -7,20 +7,20 @@ ; CHECK: OpCapability ArbitraryPrecisionIntegersINTEL ; CHECK: OpExtension "SPV_INTEL_arbitrary_precision_integers" -; CHECK: %[[#I4:]] = OpTypeInt 4 0 -; CHECK: %[[#I2:]] = OpTypeInt 2 0 -; CHECK: %[[#Z4:]] = OpConstantNull %[[#I4]] -; CHECK: %[[#Z2:]] = OpConstantNull %[[#I2]] -; CHECK: %[[#V2I2:]] = OpTypeVector %[[#I2]] 2 -; CHECK: %[[#V2I4:]] = OpTypeVector %[[#I4]] 2 -; CHECK: %[[#V3I2:]] = OpTypeVector %[[#I2]] 3 -; CHECK: %[[#V3I4:]] = OpTypeVector %[[#I4]] 3 -; CHECK: %[[#V4I2:]] = OpTypeVector %[[#I2]] 4 -; CHECK: %[[#V4I4:]] = OpTypeVector %[[#I4]] 4 -; CHECK: %[[#V8I2:]] = OpTypeVector %[[#I2]] 8 -; CHECK: %[[#V8I4:]] = OpTypeVector %[[#I4]] 8 -; CHECK: %[[#V16I2:]] = OpTypeVector %[[#I2]] 16 -; CHECK: %[[#V16I4:]] = OpTypeVector %[[#I4]] 16 +; CHECK-DAG: %[[#I4:]] = OpTypeInt 4 0 +; CHECK-DAG: %[[#I2:]] = OpTypeInt 2 0 +; CHECK-DAG: %[[#Z4:]] = OpConstantNull %[[#I4]] +; CHECK-DAG: %[[#Z2:]] = OpConstantNull %[[#I2]] +; CHECK-DAG: %[[#V2I2:]] = OpTypeVector %[[#I2]] 2 +; CHECK-DAG: %[[#V2I4:]] = OpTypeVector %[[#I4]] 2 +; CHECK-DAG: %[[#V3I2:]] = OpTypeVector %[[#I2]] 3 +; CHECK-DAG: %[[#V3I4:]] = OpTypeVector %[[#I4]] 3 +; CHECK-DAG: %[[#V4I2:]] = OpTypeVector %[[#I2]] 4 +; CHECK-DAG: %[[#V4I4:]] = OpTypeVector %[[#I4]] 4 +; CHECK-DAG: %[[#V8I2:]] = OpTypeVector %[[#I2]] 8 +; CHECK-DAG: %[[#V8I4:]] = OpTypeVector %[[#I4]] 8 +; CHECK-DAG: %[[#V16I2:]] = OpTypeVector %[[#I2]] 16 +; CHECK-DAG: %[[#V16I4:]] = OpTypeVector %[[#I4]] 16 ; CHECK: %[[#]] = OpBitReverse %[[#I2]] %[[#Z2]] @@ -36,45 +36,70 @@ ; CHECK: %[[#]] = OpBitReverse %[[#V16I2]] %[[#]] ; CHECK: %[[#]] = OpBitReverse %[[#V16I4]] %[[#]] +@G_i2_res = global i2 0 +@G_i4_res = global i4 0 +@G_v2i2_res = global <2 x i2> zeroinitializer +@G_v2i4_res = global <2 x i4> zeroinitializer +@G_v3i2_res = global <3 x i2> zeroinitializer +@G_v3i4_res = global <3 x i4> zeroinitializer +@G_v4i2_res = global <4 x i2> zeroinitializer +@G_v4i4_res = global <4 x i4> zeroinitializer +@G_v8i2_res = global <8 x i2> zeroinitializer +@G_v8i4_res = global <8 x i4> zeroinitializer +@G_v16i2_res = global <16 x i2> zeroinitializer +@G_v16i4_res = global <16 x i4> zeroinitializer + define spir_kernel void @testBitRev() { entry: %call2 = call i2 @llvm.bitreverse.i2(i2 0) + store i2 %call2, i2* @G_i2_res %call4 = call i4 @llvm.bitreverse.i4(i4 0) + store i4 %call4, i4* @G_i4_res ret void } define spir_kernel void @testBitRevV2(<2 x i2> %a, <2 x i4> %b) { entry: %call2 = call <2 x i2> @llvm.bitreverse.v2i2(<2 x i2> %a) + store <2 x i2> %call2, <2 x i2>* @G_v2i2_res %call4 = call <2 x i4> @llvm.bitreverse.v2i4(<2 x i4> %b) + store <2 x i4> %call4, <2 x i4>* @G_v2i4_res ret void } define spir_kernel void @testBitRevV3(<3 x i2> %a, <3 x i4> %b) { entry: %call2 = call <3 x i2> @llvm.bitreverse.v3i2(<3 x i2> %a) + store <3 x i2> %call2, <3 x i2>* @G_v3i2_res %call4 = call <3 x i4> @llvm.bitreverse.v3i4(<3 x i4> %b) + store <3 x i4> %call4, <3 x i4>* @G_v3i4_res ret void } define spir_kernel void @testBitRevV4(<4 x i2> %a, <4 x i4> %b) { entry: %call2 = call <4 x i2> @llvm.bitreverse.v4i2(<4 x i2> %a) + store <4 x i2> %call2, <4 x i2>* @G_v4i2_res %call4 = call <4 x i4> @llvm.bitreverse.v4i4(<4 x i4> %b) + store <4 x i4> %call4, <4 x i4>* @G_v4i4_res ret void } define spir_kernel void @testBitRevV8(<8 x i2> %a, <8 x i4> %b) { entry: %call2 = call <8 x i2> @llvm.bitreverse.v8i2(<8 x i2> %a) + store <8 x i2> %call2, <8 x i2>* @G_v8i2_res %call4 = call <8 x i4> @llvm.bitreverse.v8i4(<8 x i4> %b) + store <8 x i4> %call4, <8 x i4>* @G_v8i4_res ret void } define spir_kernel void @testBitRevV16(<16 x i2> %a, <16 x i4> %b) { entry: %call2 = call <16 x i2> @llvm.bitreverse.v16i2(<16 x i2> %a) + store <16 x i2> %call2, <16 x i2>* @G_v16i2_res %call4 = call <16 x i4> @llvm.bitreverse.v16i4(<16 x i4> %b) + store <16 x i4> %call4, <16 x i4>* @G_v16i4_res ret void } diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/constrained-arithmetic.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/constrained-arithmetic.ll index 11bedfa605f9b..8e8e4df8fabc6 100644 --- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/constrained-arithmetic.ll +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/constrained-arithmetic.ll @@ -23,15 +23,28 @@ ; CHECK: OpExtInst %[[#]] %[[#]] fma %[[#]] %[[#]] %[[#]] ; CHECK: OpFRem +@G_r1 = global float 0.0 +@G_r2 = global float 0.0 +@G_r3 = global float 0.0 +@G_r4 = global float 0.0 +@G_r5 = global float 0.0 +@G_r6 = global float 0.0 + ; Function Attrs: norecurse nounwind strictfp define dso_local spir_kernel void @test(float %a, i32 %in, i32 %ui) { entry: %r1 = tail call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") + store float %r1, ptr @G_r1 %r2 = tail call float @llvm.experimental.constrained.fdiv.f32(float %a, float %a, metadata !"round.towardzero", metadata !"fpexcept.strict") + store float %r2, ptr @G_r2 %r3 = tail call float @llvm.experimental.constrained.fsub.f32(float %a, float %a, metadata !"round.upward", metadata !"fpexcept.strict") + store float %r3, ptr @G_r3 %r4 = tail call float @llvm.experimental.constrained.fmul.f32(float %a, float %a, metadata !"round.downward", metadata !"fpexcept.strict") + store float %r4, ptr @G_r4 %r5 = tail call float @llvm.experimental.constrained.fma.f32(float %a, float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + store float %r5, ptr @G_r5 %r6 = tail call float @llvm.experimental.constrained.frem.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + store float %r6, ptr @G_r6 ret void } diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lifetime.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lifetime.ll index f83cd8ad1969c..375da5b32e232 100644 --- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lifetime.ll +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lifetime.ll @@ -18,19 +18,20 @@ ; CL: %[[#FooVar:]] = OpVariable ; CL-NEXT: %[[#Casted1:]] = OpBitcast %[[#PtrChar]] %[[#FooVar]] ; CL-NEXT: OpLifetimeStart %[[#Casted1]] 16 -; CL-NEXT: OpBitcast -; CL-NEXT: OpInBoundsPtrAccessChain -; CL-NEXT: %[[#Casted2:]] = OpBitcast %[[#PtrChar]] %[[#FooVar]] +; CL: OpInBoundsPtrAccessChain +; CL: %[[#Casted2:]] = OpBitcast %[[#PtrChar]] %[[#FooVar]] ; CL-NEXT: OpLifetimeStop %[[#Casted2]] 16 ; VK: OpFunction ; VK: %[[#FooVar:]] = OpVariable ; VK-NEXT: OpInBoundsAccessChain +; VK-NEXT: OpStore ; VK-NEXT: OpReturn define spir_func void @foo(ptr noundef byval(%tprange) align 8 %_arg_UserRange) { %RoundedRangeKernel = alloca %tprange, align 8 call void @llvm.lifetime.start.p0(ptr nonnull %RoundedRangeKernel) %KernelFunc = getelementptr inbounds i8, ptr %RoundedRangeKernel, i64 8 + store i64 zeroinitializer, ptr %KernelFunc, align 8 call void @llvm.lifetime.end.p0(ptr nonnull %RoundedRangeKernel) ret void } @@ -39,37 +40,40 @@ define spir_func void @foo(ptr noundef byval(%tprange) align 8 %_arg_UserRange) ; CL: %[[#BarVar:]] = OpVariable ; CL-NEXT: %[[#Casted1:]] = OpBitcast %[[#PtrChar]] %[[#BarVar]] ; CL-NEXT: OpLifetimeStart %[[#Casted1]] 16 -; CL-NEXT: OpBitcast -; CL-NEXT: OpInBoundsPtrAccessChain -; CL-NEXT: %[[#Casted2:]] = OpBitcast %[[#PtrChar]] %[[#BarVar]] +; CL: OpInBoundsPtrAccessChain +; CL: %[[#Casted2:]] = OpBitcast %[[#PtrChar]] %[[#BarVar]] ; CL-NEXT: OpLifetimeStop %[[#Casted2]] 16 ; VK: OpFunction ; VK: %[[#BarVar:]] = OpVariable ; VK-NEXT: OpInBoundsAccessChain +; VK-NEXT: OpStore ; VK-NEXT: OpReturn define spir_func void @bar(ptr noundef byval(%tprange) align 8 %_arg_UserRange) { %RoundedRangeKernel = alloca %tprange, align 8 call void @llvm.lifetime.start.p0(ptr nonnull %RoundedRangeKernel) %KernelFunc = getelementptr inbounds i8, ptr %RoundedRangeKernel, i64 8 + store i64 zeroinitializer, ptr %KernelFunc, align 8 call void @llvm.lifetime.end.p0(ptr nonnull %RoundedRangeKernel) ret void } ; CL: OpFunction ; CL: %[[#TestVar:]] = OpVariable -; CL-NEXT: OpLifetimeStart %[[#TestVar]] 1 -; CL-NEXT: OpInBoundsPtrAccessChain -; CL-NEXT: OpLifetimeStop %[[#TestVar]] 1 +; CL: OpLifetimeStart %[[#TestVar]] 1 +; CL: OpInBoundsPtrAccessChain +; CL: OpLifetimeStop %[[#TestVar]] 1 ; VK: OpFunction ; VK: %[[#Test:]] = OpVariable ; VK-NEXT: OpInBoundsAccessChain +; VK-NEXT: OpStore ; VK-NEXT: OpReturn define spir_func void @test(ptr noundef align 8 %_arg) { %var = alloca i8, align 8 call void @llvm.lifetime.start.p0(ptr nonnull %var) %KernelFunc = getelementptr inbounds i8, ptr %var, i64 1 + store i8 0, ptr %KernelFunc, align 8 call void @llvm.lifetime.end.p0(ptr nonnull %var) ret void } diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/logical-memcpy.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/logical-memcpy.ll new file mode 100644 index 0000000000000..63eddd20bfc22 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/logical-memcpy.ll @@ -0,0 +1,32 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpName %[[dst_var:[0-9]+]] "dst" +; CHECK: OpName %[[src_var:[0-9]+]] "src" + +; CHECK: %[[f32:[0-9]+]] = OpTypeFloat 32 +; CHECK: %[[structS:[0-9]+]] = OpTypeStruct %[[f32]] %[[f32]] %[[f32]] %[[f32]] %[[f32]] +; CHECK: %[[ptr_crosswkgrp_structS:[0-9]+]] = OpTypePointer CrossWorkgroup %[[structS]] +%struct.S = type <{ float, float, float, float, float }> + +; CHECK-DAG: %[[src_var]] = OpVariable %[[ptr_crosswkgrp_structS]] CrossWorkgroup +@src = external dso_local addrspace(1) global %struct.S, align 4 + +; CHECK-DAG: %[[dst_var]] = OpVariable %[[ptr_crosswkgrp_structS]] CrossWorkgroup +@dst = external dso_local addrspace(1) global %struct.S, align 4 + +; CHECK: %[[main_func:[0-9]+]] = OpFunction %{{[0-9]+}} None %{{[0-9]+}} +; CHECK: %[[entry:[0-9]+]] = OpLabel +; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(readwrite, inaccessiblemem: none, target_mem0: none, target_mem1: none) +define void @main() local_unnamed_addr #0 { +entry: +; CHECK: OpCopyMemory %[[dst_var]] %[[src_var]] Aligned 4 + call void @llvm.memcpy.p0.p0.i64(ptr addrspace(1) align 4 @dst, ptr addrspace(1) align 4 @src, i64 20, i1 false) + ret void +; CHECK: OpReturn +; CHECK: OpFunctionEnd +} + +attributes #0 = { "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" } + + diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/satur-arith.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/satur-arith.ll index 08f15c077fed9..db930d1b28ec3 100644 --- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/satur-arith.ll +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/satur-arith.ll @@ -9,29 +9,55 @@ ; CHECK-DAG: OpName %[[#Bar:]] "bar" ; CHECK: %[[#Foo]] = OpFunction ; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] u_add_sat -; CHECK-NEXT: %[[#]] = OpExtInst %[[#]] %[[#]] u_sub_sat -; CHECK-NEXT: %[[#]] = OpExtInst %[[#]] %[[#]] s_add_sat -; CHECK-NEXT: %[[#]] = OpExtInst %[[#]] %[[#]] s_sub_sat +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] u_sub_sat +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] s_add_sat +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] s_sub_sat ; CHECK: %[[#Bar]] = OpFunction ; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] u_add_sat -; CHECK-NEXT: %[[#]] = OpExtInst %[[#]] %[[#]] u_sub_sat -; CHECK-NEXT: %[[#]] = OpExtInst %[[#]] %[[#]] s_add_sat -; CHECK-NEXT: %[[#]] = OpExtInst %[[#]] %[[#]] s_sub_sat +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] u_sub_sat +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] s_add_sat +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] s_sub_sat + +@G_r1_foo = global i16 0 +@G_r2_foo = global i16 0 +@G_r3_foo = global i16 0 +@G_r4_foo = global i16 0 +@G_r1_bar = global <4 x i32> zeroinitializer +@G_r2_bar = global <4 x i32> zeroinitializer +@G_r3_bar = global <4 x i32> zeroinitializer +@G_r4_bar = global <4 x i32> zeroinitializer define spir_func void @foo(i16 %x, i16 %y) { entry: %r1 = tail call i16 @llvm.uadd.sat.i16(i16 %x, i16 %y) + store i16 %r1, ptr @G_r1_foo %r2 = tail call i16 @llvm.usub.sat.i16(i16 %x, i16 %y) + store i16 %r2, ptr @G_r2_foo %r3 = tail call i16 @llvm.sadd.sat.i16(i16 %x, i16 %y) + store i16 %r3, ptr @G_r3_foo %r4 = tail call i16 @llvm.ssub.sat.i16(i16 %x, i16 %y) + store i16 %r4, ptr @G_r4_foo ret void } define spir_func void @bar(<4 x i32> %x, <4 x i32> %y) { entry: %r1 = tail call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) + store <4 x i32> %r1, ptr @G_r1_bar %r2 = tail call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) + store <4 x i32> %r2, ptr @G_r2_bar %r3 = tail call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) + store <4 x i32> %r3, ptr @G_r3_bar %r4 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) + store <4 x i32> %r4, ptr @G_r4_bar ret void } + +declare i16 @llvm.uadd.sat.i16(i16, i16) +declare i16 @llvm.usub.sat.i16(i16, i16) +declare i16 @llvm.sadd.sat.i16(i16, i16) +declare i16 @llvm.ssub.sat.i16(i16, i16) +declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll index 08e429f36827c..54cb096da8d89 100644 --- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll @@ -90,12 +90,13 @@ define dso_local spir_func void @umulo_v2i64(<2 x i64> %a, <2 x i64> %b, ptr %p) ; CHECK: OpIAddCarry %[[StructLong]] ; CHECK: OpIAddCarry %[[StructLong]] ; CHECK: OpReturn -define void @foo(i64 %a, i64 %b) { +define i64 @foo(i64 %a, i64 %b) { %r1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) %r2 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) %d1 = extractvalue { i64, i1 } %r1, 0 %d2 = extractvalue { i64, i1 } %r2, 0 - ret void + %sum = add i64 %d1, %d2 + ret i64 %sum } declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8) diff --git a/llvm/test/CodeGen/SPIRV/logical-access-chain.ll b/llvm/test/CodeGen/SPIRV/logical-access-chain.ll index d56678ecfc2c9..e96ebf777c28f 100644 --- a/llvm/test/CodeGen/SPIRV/logical-access-chain.ll +++ b/llvm/test/CodeGen/SPIRV/logical-access-chain.ll @@ -2,6 +2,7 @@ ; CHECK-DAG: [[uint:%[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: [[uint2:%[0-9]+]] = OpTypeVector [[uint]] 2 +; CHECK-DAG: [[uint_0:%[0-9]+]] = OpConstant [[uint]] 0 ; CHECK-DAG: [[uint_1:%[0-9]+]] = OpConstant [[uint]] 1 ; CHECK-DAG: [[ptr_uint:%[0-9]+]] = OpTypePointer Function [[uint]] ; CHECK-DAG: [[ptr_uint2:%[0-9]+]] = OpTypePointer Function [[uint2]] @@ -12,7 +13,9 @@ entry: ; CHECK: [[var:%[0-9]+]] = OpVariable [[ptr_uint2]] Function %1 = getelementptr <2 x i32>, ptr %0, i32 0, i32 1 -; CHECK: {{%[0-9]+}} = OpAccessChain [[ptr_uint]] [[var]] [[uint_1]] +; CHECK: [[gep:%[0-9]+]] = OpAccessChain [[ptr_uint]] [[var]] [[uint_1]] + store i32 0, ptr %1 +; CHECK: OpStore [[gep]] [[uint_0]] ret void } diff --git a/llvm/test/CodeGen/SPIRV/logical-struct-access.ll b/llvm/test/CodeGen/SPIRV/logical-struct-access.ll index 66337b1ba2b37..518e011bf0be2 100644 --- a/llvm/test/CodeGen/SPIRV/logical-struct-access.ll +++ b/llvm/test/CodeGen/SPIRV/logical-struct-access.ll @@ -1,5 +1,4 @@ -; RUN: llc -O0 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - -print-after-all | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} +; RUN: llc -O0 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - | FileCheck %s ; CHECK-DAG: [[uint:%[0-9]+]] = OpTypeInt 32 0 @@ -24,35 +23,85 @@ ; CHECK-DAG: [[ptr_A:%[0-9]+]] = OpTypePointer Function [[A]] ; CHECK-DAG: [[ptr_B:%[0-9]+]] = OpTypePointer Function [[B]] -define void @main() #1 { -entry: - %0 = alloca %B, align 4 -; CHECK: [[tmp:%[0-9]+]] = OpVariable [[ptr_B]] Function - - %1 = getelementptr %B, ptr %0, i32 0, i32 0 +define internal ptr @gep_B_0(ptr %base) { +; CHECK: [[tmp:%[0-9]+]] = OpFunctionParameter [[ptr_B]] ; CHECK: {{%[0-9]+}} = OpAccessChain [[ptr_A]] [[tmp]] [[uint_0]] - %2 = getelementptr inbounds %B, ptr %0, i32 0, i32 0 + %res = getelementptr %B, ptr %base, i32 0, i32 0 + ret ptr %res +} + +define internal ptr @gep_inbounds_B_0(ptr %base) { +; CHECK: [[tmp:%[0-9]+]] = OpFunctionParameter [[ptr_B]] ; CHECK: {{%[0-9]+}} = OpInBoundsAccessChain [[ptr_A]] [[tmp]] [[uint_0]] + %res = getelementptr inbounds %B, ptr %base, i32 0, i32 0 + ret ptr %res +} - %3 = getelementptr %B, ptr %0, i32 0, i32 1 +define internal ptr @gep_B_1(ptr %base) { +; CHECK: [[tmp:%[0-9]+]] = OpFunctionParameter [[ptr_B]] ; CHECK: {{%[0-9]+}} = OpAccessChain [[ptr_uint]] [[tmp]] [[uint_1]] - %4 = getelementptr inbounds %B, ptr %0, i32 0, i32 1 + %res = getelementptr %B, ptr %base, i32 0, i32 1 + ret ptr %res +} + +define internal ptr @gep_inbounds_B_1(ptr %base) { +; CHECK: [[tmp:%[0-9]+]] = OpFunctionParameter [[ptr_B]] ; CHECK: {{%[0-9]+}} = OpInBoundsAccessChain [[ptr_uint]] [[tmp]] [[uint_1]] + %res = getelementptr inbounds %B, ptr %base, i32 0, i32 1 + ret ptr %res +} - %5 = getelementptr %B, ptr %0, i32 0, i32 2 +define internal ptr @gep_B_2(ptr %base) { +; CHECK: [[tmp:%[0-9]+]] = OpFunctionParameter [[ptr_B]] ; CHECK: {{%[0-9]+}} = OpAccessChain [[ptr_A]] [[tmp]] [[uint_2]] - %6 = getelementptr inbounds %B, ptr %0, i32 0, i32 2 + %res = getelementptr %B, ptr %base, i32 0, i32 2 + ret ptr %res +} + +define internal ptr @gep_inbounds_B_2(ptr %base) { +; CHECK: [[tmp:%[0-9]+]] = OpFunctionParameter [[ptr_B]] ; CHECK: {{%[0-9]+}} = OpInBoundsAccessChain [[ptr_A]] [[tmp]] [[uint_2]] + %res = getelementptr inbounds %B, ptr %base, i32 0, i32 2 + ret ptr %res +} - %7 = getelementptr %B, ptr %0, i32 0, i32 2, i32 1 +define internal ptr @gep_B_2_1(ptr %base) { +; CHECK: [[tmp:%[0-9]+]] = OpFunctionParameter [[ptr_B]] ; CHECK: {{%[0-9]+}} = OpAccessChain [[ptr_uint]] [[tmp]] [[uint_2]] [[uint_1]] - %8 = getelementptr inbounds %B, ptr %0, i32 0, i32 2, i32 1 + %res = getelementptr %B, ptr %base, i32 0, i32 2, i32 1 + ret ptr %res +} + +define internal ptr @gep_inbounds_B_2_1(ptr %base) { +; CHECK: [[tmp:%[0-9]+]] = OpFunctionParameter [[ptr_B]] ; CHECK: {{%[0-9]+}} = OpInBoundsAccessChain [[ptr_uint]] [[tmp]] [[uint_2]] [[uint_1]] + %res = getelementptr inbounds %B, ptr %base, i32 0, i32 2, i32 1 + ret ptr %res +} - %9 = getelementptr %B, ptr %0, i32 0, i32 2 - %10 = getelementptr %A, ptr %9, i32 0, i32 1 +define internal ptr @gep_B_2_A_1(ptr %base) { +; CHECK: [[tmp:%[0-9]+]] = OpFunctionParameter [[ptr_B]] ; CHECK: [[x:%[0-9]+]] = OpAccessChain [[ptr_A]] [[tmp]] [[uint_2]] ; CHECK: {{%[0-9]+}} = OpAccessChain [[ptr_uint]] [[x]] [[uint_1]] + %x = getelementptr %B, ptr %base, i32 0, i32 2 + %res = getelementptr %A, ptr %x, i32 0, i32 1 + ret ptr %res +} + +define void @main() #1 { +entry: + %0 = alloca %B, align 4 +; CHECK: [[tmp:%[0-9]+]] = OpVariable [[ptr_B]] Function + + %1 = call ptr @gep_B_0(ptr %0) + %2 = call ptr @gep_inbounds_B_0(ptr %0) + %3 = call ptr @gep_B_1(ptr %0) + %4 = call ptr @gep_inbounds_B_1(ptr %0) + %5 = call ptr @gep_B_2(ptr %0) + %6 = call ptr @gep_inbounds_B_2(ptr %0) + %7 = call ptr @gep_B_2_1(ptr %0) + %8 = call ptr @gep_inbounds_B_2_1(ptr %0) + %10 = call ptr @gep_B_2_A_1(ptr %0) ret void } diff --git a/llvm/test/CodeGen/SPIRV/phi-insert-point.ll b/llvm/test/CodeGen/SPIRV/phi-insert-point.ll index 70d121cdf4b3a..a34186d491257 100644 --- a/llvm/test/CodeGen/SPIRV/phi-insert-point.ll +++ b/llvm/test/CodeGen/SPIRV/phi-insert-point.ll @@ -36,9 +36,18 @@ ok: br label %exit exit: + store i64 %r1, ptr @g1 + store i64 %r2, ptr @g2 + store ptr addrspace(4) %r3, ptr @g3 + store ptr addrspace(4) %r4, ptr @g4 ret void } +@g1 = internal global i64 0 +@g2 = internal global i64 0 +@g3 = internal global ptr addrspace(4) null +@g4 = internal global ptr addrspace(4) null + define spir_kernel void @bar(i64 %arg_val, i64 %arg_val_def, ptr addrspace(4) byval(%struct) %arg_ptr, ptr addrspace(4) %arg_ptr_def) { entry: %fl = icmp eq i64 %arg_val, 0 @@ -55,5 +64,9 @@ ok: br label %exit exit: + store i64 %r1, ptr @g1 + store i64 %r2, ptr @g2 + store ptr addrspace(4) %r3, ptr @g3 + store ptr addrspace(4) %r4, ptr @g4 ret void } diff --git a/llvm/test/CodeGen/SPIRV/phi-ptrcast-dominate.ll b/llvm/test/CodeGen/SPIRV/phi-ptrcast-dominate.ll index bc090ce55fbec..c250ebae12746 100644 --- a/llvm/test/CodeGen/SPIRV/phi-ptrcast-dominate.ll +++ b/llvm/test/CodeGen/SPIRV/phi-ptrcast-dominate.ll @@ -20,11 +20,14 @@ ; CHECK: %[[#Case1]] = OpFunction define spir_func void @case1(i1 %b1, i1 %b2, i1 %b3) { entry: + %tmp.1 = alloca i8, align 1 ; CHECK: OpBranchConditional %[[#]] %[[#l1:]] %[[#l2:]] br i1 %b1, label %l1, label %l2 l1: %str = phi ptr addrspace(1) [ @.str.1, %entry ], [ @.str.2, %l2 ], [ @.str.2, %l3 ] + %v1 = load i8, ptr addrspace(1) %str, align 1 + store i8 %v1, ptr %tmp.1, align 1 br label %exit ; CHECK: %[[#l2]] = OpLabel @@ -51,11 +54,14 @@ exit: ; CHECK: %[[#Case2]] = OpFunction define spir_func void @case2(i1 %b1, i1 %b2, i1 %b3, ptr addrspace(1) byval(%struct1) %str1, ptr addrspace(1) byval(%struct2) %str2) { entry: + %tmp.2 = alloca i8, align 1 ; CHECK: OpBranchConditional %[[#]] %[[#l1:]] %[[#l2:]] br i1 %b1, label %l1, label %l2 l1: %str = phi ptr addrspace(1) [ %str1, %entry ], [ %str2, %l2 ], [ %str2, %l3 ] + %v2 = load i8, ptr addrspace(1) %str, align 1 + store i8 %v2, ptr %tmp.2, align 1 br label %exit ; CHECK: %[[#l2]] = OpLabel @@ -83,10 +89,13 @@ define spir_func void @case3(i1 %b1, i1 %b2, i1 %b3, ptr addrspace(1) byval(%str ; CHECK: OpBranchConditional %[[#]] %[[#l1:]] %[[#l2:]] entry: + %tmp.3 = alloca i8, align 1 br i1 %b1, label %l1, label %l2 l1: %str = phi ptr addrspace(1) [ %_arg_str1, %entry ], [ %str2, %l2 ], [ %str3, %l3 ] + %v3 = load i8, ptr addrspace(1) %str, align 1 + store i8 %v3, ptr %tmp.3, align 1 br label %exit ; CHECK: %[[#l2]] = OpLabel diff --git a/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-accesschain.ll b/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-accesschain.ll index 7db1eed84bf7d..3382987bbd581 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-accesschain.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-accesschain.ll @@ -26,9 +26,13 @@ %struct.S = type { i32 } %struct.__wrapper_class = type { [7 x %struct.S] } +@G_elem = global ptr null +@G_data = global i64 0 + define spir_kernel void @foo1(ptr noundef byval(%struct.__wrapper_class) align 4 %_arg_Arr) { entry: %elem = getelementptr inbounds i8, ptr %_arg_Arr, i64 0 + store ptr %elem, ptr @G_elem ret void } @@ -36,5 +40,6 @@ define spir_kernel void @foo2(ptr noundef byval(%struct.__wrapper_class) align 4 entry: %elem = getelementptr inbounds %struct.__wrapper_class, ptr %_arg_Arr, i64 0 %data = load i64, ptr %elem + store i64 %data, ptr @G_data ret void } diff --git a/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-load.ll b/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-load.ll index d6a0071167cef..ed5652a750582 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-load.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-load.ll @@ -14,8 +14,11 @@ %struct.S = type { i32 } %struct.__wrapper_class = type { [7 x %struct.S] } +@G = global i32 0 + define spir_kernel void @foo(ptr noundef byval(%struct.__wrapper_class) align 4 %_arg_Arr) { entry: %val = load i32, ptr %_arg_Arr + store i32 %val, ptr @G ret void } diff --git a/llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll b/llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll index 0e2730e18bf38..e47aa61a8acd7 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll @@ -30,6 +30,8 @@ %"class.std::complex" = type { { double, double } } %class.anon = type { i32, ptr addrspace(4), [2 x [2 x %"class.std::complex"]] } +@G = global ptr addrspace(4) null + define weak_odr dso_local spir_kernel void @foo(i32 noundef %_arg_N, ptr addrspace(1) noundef align 8 %_arg_p) { entry: %Kernel = alloca %class.anon, align 8 @@ -38,5 +40,6 @@ entry: %r0 = addrspacecast ptr addrspace(1) %_arg_p to ptr addrspace(4) store ptr addrspace(4) %r0, ptr %p, align 8 %r3 = load ptr addrspace(4), ptr %p, align 8 + store ptr addrspace(4) %r3, ptr @G ret void } diff --git a/llvm/test/CodeGen/SPIRV/pointers/getelementptr-addressspace.ll b/llvm/test/CodeGen/SPIRV/pointers/getelementptr-addressspace.ll index 7a09ac973b590..0e397ec51caaa 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/getelementptr-addressspace.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/getelementptr-addressspace.ll @@ -7,9 +7,14 @@ ; CHECK: %[[#]] = OpInBoundsPtrAccessChain %[[#PTR1]] %[[#]] %[[#]] ; CHECK: %[[#]] = OpInBoundsPtrAccessChain %[[#PTR2]] %[[#]] %[[#]] +@G_c = global ptr addrspace(1) null +@G_d = global ptr addrspace(2) null + define spir_kernel void @foo(ptr addrspace(1) %a, ptr addrspace(2) %b) { entry: %c = getelementptr inbounds i8, ptr addrspace(1) %a, i32 1 + store ptr addrspace(1) %c, ptr @G_c %d = getelementptr inbounds i8, ptr addrspace(2) %b, i32 2 + store ptr addrspace(2) %d, ptr @G_d ret void } diff --git a/llvm/test/CodeGen/SPIRV/pointers/getelementptr-base-type.ll b/llvm/test/CodeGen/SPIRV/pointers/getelementptr-base-type.ll index c822dbc5d6c0e..e12a809125248 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/getelementptr-base-type.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/getelementptr-base-type.ll @@ -7,9 +7,12 @@ ; CHECK: %[[#GEP:]] = OpInBoundsPtrAccessChain %[[#PTR]] %[[#ARG]] %[[#]] ; CHECK: %[[#]] = OpLoad %[[#FLOAT32]] %[[#GEP]] Aligned 4 +@G = global float 0.0 + define spir_kernel void @test1(ptr addrspace(1) %arg1) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_type_qual !4 { %a = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 1 %b = load float, ptr addrspace(1) %a, align 4 + store float %b, ptr @G ret void } diff --git a/llvm/test/CodeGen/SPIRV/pointers/getelementptr-bitcast-load.ll b/llvm/test/CodeGen/SPIRV/pointers/getelementptr-bitcast-load.ll index 1d846a35a65aa..859253e5b18d9 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/getelementptr-bitcast-load.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/getelementptr-bitcast-load.ll @@ -7,6 +7,9 @@ ; CHECK-DAG: %[[#PTR_VEC3:]] = OpTypePointer CrossWorkgroup %[[#VEC3]] ; CHECK-DAG: %[[#PTR_VEC4:]] = OpTypePointer CrossWorkgroup %[[#VEC4]] +@G_loadv1 = global <4 x i8> zeroinitializer +@G_loadv2 = global <4 x i8> zeroinitializer + ; CHECK: %[[#AC1:]] = OpInBoundsPtrAccessChain %[[#PTR_VEC3]] %[[#]] %[[#]] ; CHECK: %[[#BC1:]] = OpBitcast %[[#PTR_VEC4]] %[[#AC1]] ; CHECK: %[[#LD1:]] = OpLoad %[[#VEC4]] %[[#BC1]] Aligned 4 @@ -15,6 +18,7 @@ define spir_kernel void @foo(ptr addrspace(1) %a, i64 %b) { %index = getelementptr inbounds <3 x i8>, ptr addrspace(1) %a, i64 %b %loadv = load <4 x i8>, ptr addrspace(1) %index, align 4 + store <4 x i8> %loadv, ptr @G_loadv1 ret void } @@ -29,5 +33,6 @@ define spir_kernel void @bar(ptr addrspace(1) %a, i64 %b) { ; from older LLVM IR with typed pointers. %cast = bitcast ptr addrspace(1) %index to ptr addrspace(1) %loadv = load <4 x i8>, ptr addrspace(1) %cast, align 4 + store <4 x i8> %loadv, ptr @G_loadv2 ret void } diff --git a/llvm/test/CodeGen/SPIRV/pointers/getelementptr-kernel-arg-char.ll b/llvm/test/CodeGen/SPIRV/pointers/getelementptr-kernel-arg-char.ll index a5e891dae6f11..3ae03edf5200f 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/getelementptr-kernel-arg-char.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/getelementptr-kernel-arg-char.ll @@ -7,11 +7,15 @@ ; CHECK-DAG: %[[#PTRINT8:]] = OpTypePointer Workgroup %[[#INT8]] ; CHECK-DAG: %[[#CONST:]] = OpConstant %[[#INT64]] 1 +@G_gep1 = global ptr addrspace(3) null +@G_gep2 = global ptr addrspace(3) null + ; CHECK: %[[#PARAM1:]] = OpFunctionParameter %[[#PTRINT8]] define spir_kernel void @test1(ptr addrspace(3) %address) { ; CHECK: %[[#]] = OpInBoundsPtrAccessChain %[[#PTRINT8]] %[[#PARAM1]] %[[#CONST]] %cast = bitcast ptr addrspace(3) %address to ptr addrspace(3) %gep = getelementptr inbounds i8, ptr addrspace(3) %cast, i64 1 + store ptr addrspace(3) %gep, ptr @G_gep1 ret void } @@ -19,5 +23,6 @@ define spir_kernel void @test1(ptr addrspace(3) %address) { define spir_kernel void @test2(ptr addrspace(3) %address) { ; CHECK: %[[#]] = OpInBoundsPtrAccessChain %[[#PTRINT8]] %[[#PARAM2]] %[[#CONST]] %gep = getelementptr inbounds i8, ptr addrspace(3) %address, i64 1 + store ptr addrspace(3) %gep, ptr @G_gep2 ret void } diff --git a/llvm/test/CodeGen/SPIRV/pointers/global-addrspacecast.ll b/llvm/test/CodeGen/SPIRV/pointers/global-addrspacecast.ll index 19451d23c6830..39563aecafec4 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/global-addrspacecast.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/global-addrspacecast.ll @@ -7,13 +7,16 @@ ; CHECK-DAG: %[[#value:]] = OpConstant %[[#type]] 456 ; CHECK-DAG: %[[#var:]] = OpVariable %[[#ptrty]] Private %[[#value]] +@G = internal global i32 0 + define hidden spir_func void @Foo() { %p = addrspacecast ptr addrspace(10) @PrivInternal to ptr %v = load i32, ptr %p, align 4 + store i32 %v, ptr @G ret void ; CHECK: OpLabel -; CHECK-NEXT: OpLoad %[[#type]] %[[#var]] Aligned 4 -; CHECK-Next: OpReturn +; CHECK: OpLoad %[[#type]] %[[#var]] Aligned 4 +; CHECK: OpReturn } define void @main() #1 { diff --git a/llvm/test/CodeGen/SPIRV/pointers/load-addressspace.ll b/llvm/test/CodeGen/SPIRV/pointers/load-addressspace.ll index b3c68d22f9bdd..681fb70ad706d 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/load-addressspace.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/load-addressspace.ll @@ -9,9 +9,14 @@ ; CHECK: %[[#]] = OpLoad %[[#INT8]] %[[#FNP1]] Aligned 1 ; CHECK: %[[#]] = OpLoad %[[#INT8]] %[[#FNP2]] Aligned 1 +@G_c = global i8 0 +@G_d = global i8 0 + define spir_kernel void @foo(ptr addrspace(1) %a, ptr addrspace(2) %b) { entry: %c = load i8, ptr addrspace(1) %a + store i8 %c, ptr @G_c %d = load i8, ptr addrspace(2) %b + store i8 %d, ptr @G_d ret void } diff --git a/llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll b/llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll index a9e79df259c4f..44134f83cfec3 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll @@ -51,6 +51,7 @@ l1: l2: %val2 = phi ptr addrspace(4) [ %p, %l1 ], [ %val3, %l3 ] %val1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @G1 to ptr addrspace(4)), %l1 ], [ %val2, %l3 ] + store i16 0, ptr addrspace(4) %val1, align 2 br i1 %f2, label %l3, label %exit l3: @@ -75,6 +76,7 @@ l1: l2: %val1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @G1 to ptr addrspace(4)), %l1 ], [ %val2, %l3 ] %val2 = phi ptr addrspace(4) [ %p, %l1 ], [ %val3, %l3 ] + store i16 0, ptr addrspace(4) %val1, align 2 br i1 %f2, label %l3, label %exit exit: diff --git a/llvm/test/CodeGen/SPIRV/pointers/pointer-addrspacecast.ll b/llvm/test/CodeGen/SPIRV/pointers/pointer-addrspacecast.ll index 4d5549dfab8d9..123daa411810b 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/pointer-addrspacecast.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/pointer-addrspacecast.ll @@ -10,6 +10,7 @@ ; CHECK-DAG: OpName %[[#func_chain:]] "chain" @global = internal addrspace(10) global i32 zeroinitializer +@G = global i32 0 define void @simple() { ; CHECK: %[[#func_simple]] = OpFunction @@ -17,6 +18,7 @@ entry: %ptr = getelementptr i32, ptr addrspace(10) @global, i32 0 %casted = addrspacecast ptr addrspace(10) %ptr to ptr %val = load i32, ptr %casted + store i32 %val, ptr @G ; CHECK: %{{.*}} = OpLoad %[[#uint]] %[[#var]] Aligned 4 ret void } @@ -31,6 +33,7 @@ entry: %e = addrspacecast ptr addrspace(10) %d to ptr %val = load i32, ptr %e + store i32 %val, ptr @G ; CHECK: %{{.*}} = OpLoad %[[#uint]] %[[#var]] Aligned 4 ret void } diff --git a/llvm/test/CodeGen/SPIRV/pointers/ptr-eq-types.ll b/llvm/test/CodeGen/SPIRV/pointers/ptr-eq-types.ll index 876cd3c20cf35..80ee36cfe15d2 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/ptr-eq-types.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/ptr-eq-types.ll @@ -15,6 +15,9 @@ ; CHECK: OpGenericCastToPtr ; CHECK: OpPtrEqual +@G_b1 = global i1 0 +@G_b2 = global i1 0 + define spir_kernel void @foo(ptr addrspace(3) align 4 %_arg_local, ptr addrspace(1) align 4 %_arg_global) { entry: %p1 = getelementptr inbounds i32, ptr addrspace(1) %_arg_global, i64 0 @@ -24,9 +27,12 @@ entry: %p4 = addrspacecast ptr addrspace(1) %p3 to ptr addrspace(4) %p5 = tail call spir_func ptr addrspace(3) @_Z40__spirv_GenericCastToPtrExplicit_ToLocalPvi(ptr addrspace(4) %p4, i32 4) %b1 = icmp eq ptr addrspace(3) %p5, null + store i1 %b1, ptr @G_b1 %p6 = getelementptr inbounds i32, ptr addrspace(3) %p5, i64 0 %p7 = tail call spir_func ptr addrspace(3) @_Z40__spirv_GenericCastToPtrExplicit_ToLocalPvi(ptr addrspace(4) %p4, i32 4) %b2 = icmp eq ptr addrspace(3) %p7, null + store i1 %b2, ptr @G_b2 + store ptr addrspace(3) %p6, ptr addrspace(3) %p2 ret void } diff --git a/llvm/test/CodeGen/SPIRV/pointers/resource-vector-load-store.ll b/llvm/test/CodeGen/SPIRV/pointers/resource-vector-load-store.ll index 7548f4757dbe6..6fc03a386d14d 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/resource-vector-load-store.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/resource-vector-load-store.ll @@ -4,18 +4,23 @@ @.str = private unnamed_addr constant [7 x i8] c"buffer\00", align 1 +; The i64 values in the extracts will be turned +; into immidiate values. There should be no 64-bit +; integers in the module. +; CHECK-NOT: OpTypeInt 64 0 + define void @main() "hlsl.shader"="pixel" { -; CHECK: %24 = OpFunction %2 None %3 ; -- Begin function main -; CHECK-NEXT: %1 = OpLabel -; CHECK-NEXT: %25 = OpVariable %13 Function %22 -; CHECK-NEXT: %26 = OpLoad %7 %23 -; CHECK-NEXT: %27 = OpImageRead %5 %26 %15 -; CHECK-NEXT: %28 = OpCompositeExtract %4 %27 0 -; CHECK-NEXT: %29 = OpCompositeExtract %4 %27 1 -; CHECK-NEXT: %30 = OpFAdd %4 %29 %28 -; CHECK-NEXT: %31 = OpCompositeInsert %5 %30 %27 0 -; CHECK-NEXT: %32 = OpLoad %7 %23 -; CHECK-NEXT: OpImageWrite %32 %15 %31 +; CHECK: %[[FUNC:[0-9]+]] = OpFunction %[[VOID:[0-9]+]] None %[[FNTYPE:[0-9]+]] ; -- Begin function main +; CHECK-NEXT: %[[LABEL:[0-9]+]] = OpLabel +; CHECK-NEXT: %[[VAR:[0-9]+]] = OpVariable %[[PTR_FN:[a-zA-Z0-9_]+]] Function %[[INIT:[a-zA-Z0-9_]+]] +; CHECK-NEXT: %[[LOAD1:[0-9]+]] = OpLoad %[[IMG_TYPE:[a-zA-Z0-9_]+]] %[[IMG_VAR:[a-zA-Z0-9_]+]] +; CHECK-NEXT: %[[READ:[0-9]+]] = OpImageRead %[[VEC4:[a-zA-Z0-9_]+]] %[[LOAD1]] %[[COORD:[a-zA-Z0-9_]+]] +; CHECK-NEXT: %[[EXTRACT1:[0-9]+]] = OpCompositeExtract %[[FLOAT:[a-zA-Z0-9_]+]] %[[READ]] 0 +; CHECK-NEXT: %[[EXTRACT2:[0-9]+]] = OpCompositeExtract %[[FLOAT]] %[[READ]] 1 +; CHECK-NEXT: %[[ADD:[0-9]+]] = OpFAdd %[[FLOAT]] %[[EXTRACT2]] %[[EXTRACT1]] +; CHECK-NEXT: %[[INSERT:[0-9]+]] = OpCompositeInsert %[[VEC4]] %[[ADD]] %[[READ]] 0 +; CHECK-NEXT: %[[LOAD2:[0-9]+]] = OpLoad %[[IMG_TYPE]] %[[IMG_VAR]] +; CHECK-NEXT: OpImageWrite %[[LOAD2]] %[[COORD]] %[[INSERT]] ; CHECK-NEXT: OpReturn ; CHECK-NEXT: OpFunctionEnd entry: diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll index 101116f437811..7409b3db51948 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll @@ -34,6 +34,8 @@ %class.CustomType = type { i64 } +@G = global ptr addrspace(4) null + define linkonce_odr dso_local spir_func void @bar(ptr addrspace(4) noundef %first) { entry: %first.addr = alloca ptr addrspace(4) @@ -44,6 +46,7 @@ entry: call spir_func void @foo(i64 noundef 100, ptr addrspace(4) noundef dereferenceable(8) %first.addr.ascast, ptr addrspace(4) noundef dereferenceable(8) %temp.ascast) call spir_func void @foo(i64 noundef 100, ptr addrspace(4) noundef dereferenceable(8) %temp.ascast, ptr addrspace(4) noundef dereferenceable(8) %first.addr.ascast) %var = alloca ptr addrspace(4), align 8 + store ptr addrspace(4) null, ptr %var ret void } diff --git a/llvm/test/CodeGen/SPIRV/remove-dead-type-intrinsics.ll b/llvm/test/CodeGen/SPIRV/remove-dead-type-intrinsics.ll new file mode 100644 index 0000000000000..6bd640f813142 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/remove-dead-type-intrinsics.ll @@ -0,0 +1,31 @@ +; RUN: llc -O0 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} + +%A = type { + i32, + i32 +} + +%B = type { + %A, + i32, + %A +} + +; Make sure all struct types are removed. +; CHECK-NOT: OpTypeStruct + +; Make sure the GEPs and the function scope variable are removed. +; CHECK: OpFunction +; CHECK-NEXT: OpLabel +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +define void @main() #1 { +entry: + %0 = alloca %B, align 4 + %1 = getelementptr %B, ptr %0, i32 0, i32 2 + %2 = getelementptr %A, ptr %1, i32 0, i32 1 + ret void +} + +attributes #1 = { "hlsl.numthreads"="4,8,16" "hlsl.shader"="compute" } diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse-subbyte.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse-subbyte.ll index 481bad9a26b7b..280f586891717 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse-subbyte.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse-subbyte.ll @@ -19,10 +19,15 @@ ; TODO: Add a check to ensure that there's no behavior change of bitreverse operation ; between the LLVM-IR and SPIR-V for i2 and i4 +@G_res2 = global i2 0 +@G_res4 = global i4 0 + define spir_func void @foo(i2 %a, i4 %b) { entry: %res2 = tail call i2 @llvm.bitreverse.i2(i2 %a) + store i2 %res2, ptr @G_res2 %res4 = tail call i4 @llvm.bitreverse.i4(i4 %b) + store i4 %res4, ptr @G_res4 ret void } diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll index 119dbe14446c1..68f33510b6a8d 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll @@ -45,6 +45,12 @@ entry: %GE = call spir_func ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) %var1, i32 5) %LE = call spir_func ptr addrspace(3) @_Z40__spirv_GenericCastToPtrExplicit_ToLocalPvi(ptr addrspace(4) %var2, i32 4) %PE = call spir_func ptr @_Z42__spirv_GenericCastToPtrExplicit_ToPrivatePvi(ptr addrspace(4) %var3, i32 7) + store i32 0, ptr addrspace(1) %G, align 4 + store i8 0, ptr addrspace(3) %L, align 1 + store i32 0, ptr %P, align 4 + store i32 0, ptr addrspace(1) %GE, align 4 + store i8 0, ptr addrspace(3) %LE, align 1 + store i32 0, ptr %PE, align 4 ret void } @@ -70,6 +76,9 @@ entry: %G = call spir_func ptr addrspace(1) @_Z9to_globalPv(ptr addrspace(4) %var1) %L = call spir_func ptr addrspace(3) @_Z8to_localPv(ptr addrspace(4) %var2) %P = call spir_func ptr @_Z10to_privatePv(ptr addrspace(4) %var3) + store i32 0, ptr addrspace(1) %G, align 4 + store i8 0, ptr addrspace(3) %L, align 1 + store i32 0, ptr %P, align 4 ret void } @@ -114,6 +123,12 @@ entry: %GE = call spir_func ptr addrspace(1) @__spirv_GenericCastToPtrExplicit_ToGlobal(ptr addrspace(4) %var1, i32 5) %LE = call spir_func ptr addrspace(3) @__spirv_GenericCastToPtrExplicit_ToLocal(ptr addrspace(4) %var2, i32 4) %PE = call spir_func ptr @__spirv_GenericCastToPtrExplicit_ToPrivate(ptr addrspace(4) %var3, i32 7) + store i32 0, ptr addrspace(1) %G, align 4 + store i8 0, ptr addrspace(3) %L, align 1 + store i32 0, ptr %P, align 4 + store i32 0, ptr addrspace(1) %GE, align 4 + store i8 0, ptr addrspace(3) %LE, align 1 + store i32 0, ptr %PE, align 4 ret void } @@ -139,6 +154,9 @@ entry: %G = call spir_func ptr addrspace(1) @to_global(ptr addrspace(4) %var1) %L = call spir_func ptr addrspace(3) @to_local(ptr addrspace(4) %var2) %P = call spir_func ptr @to_private(ptr addrspace(4) %var3) + store i32 0, ptr addrspace(1) %G, align 4 + store i8 0, ptr addrspace(3) %L, align 1 + store i32 0, ptr %P, align 4 ret void } diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpPtrCastToGeneric.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpPtrCastToGeneric.ll index 818243ab19e41..9f08a65c16866 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/OpPtrCastToGeneric.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpPtrCastToGeneric.ll @@ -16,9 +16,13 @@ ; CHECK-SPIRV: OpGenericCastToPtr %[[#LocalCharPtr]] %[[#Ptr2]] ; CHECK-SPIRV: OpFunctionEnd +@G_p = global ptr addrspace(3) null +@G_p2 = global ptr addrspace(3) null + define spir_kernel void @foo(ptr addrspace(1) %arg) { entry: %p = addrspacecast ptr addrspace(1) %arg to ptr addrspace(3) + store ptr addrspace(3) %p, ptr @G_p ret void } @@ -26,5 +30,6 @@ define spir_kernel void @bar(ptr addrspace(1) %arg) { entry: %p1 = addrspacecast ptr addrspace(1) %arg to ptr addrspace(4) %p2 = addrspacecast ptr addrspace(4) %p1 to ptr addrspace(3) + store ptr addrspace(3) %p2, ptr @G_p2 ret void } diff --git a/llvm/test/CodeGen/SPIRV/transcoding/fcmp.ll b/llvm/test/CodeGen/SPIRV/transcoding/fcmp.ll index 46eaba9d5ceb1..c752e278927a9 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/fcmp.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/fcmp.ll @@ -184,6 +184,8 @@ ; CHECK-SPIRV: %[[#r89]] = OpUnordered %[[#bool]] ; CHECK-SPIRV: %[[#r90]] = OpUnordered %[[#bool]] +@G = global [90 x i1] zeroinitializer + define spir_kernel void @testFCmp(float %a, float %b) local_unnamed_addr { entry: %r1 = fcmp oeq float %a, %b @@ -276,5 +278,185 @@ entry: %r88 = fcmp uno float %a, %b %r89 = fcmp ninf uno float %a, %b %r90 = fcmp nsz uno float %a, %b + %p1 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 0 + store i1 %r1, ptr %p1 + %p2 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 1 + store i1 %r2, ptr %p2 + %p3 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 2 + store i1 %r3, ptr %p3 + %p4 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 3 + store i1 %r4, ptr %p4 + %p5 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 4 + store i1 %r5, ptr %p5 + %p6 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 5 + store i1 %r6, ptr %p6 + %p7 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 6 + store i1 %r7, ptr %p7 + %p8 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 7 + store i1 %r8, ptr %p8 + %p9 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 8 + store i1 %r9, ptr %p9 + %p10 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 9 + store i1 %r10, ptr %p10 + %p11 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 10 + store i1 %r11, ptr %p11 + %p12 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 11 + store i1 %r12, ptr %p12 + %p13 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 12 + store i1 %r13, ptr %p13 + %p14 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 13 + store i1 %r14, ptr %p14 + %p15 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 14 + store i1 %r15, ptr %p15 + %p16 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 15 + store i1 %r16, ptr %p16 + %p17 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 16 + store i1 %r17, ptr %p17 + %p18 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 17 + store i1 %r18, ptr %p18 + %p19 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 18 + store i1 %r19, ptr %p19 + %p20 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 19 + store i1 %r20, ptr %p20 + %p21 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 20 + store i1 %r21, ptr %p21 + %p22 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 21 + store i1 %r22, ptr %p22 + %p23 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 22 + store i1 %r23, ptr %p23 + %p24 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 23 + store i1 %r24, ptr %p24 + %p25 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 24 + store i1 %r25, ptr %p25 + %p26 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 25 + store i1 %r26, ptr %p26 + %p27 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 26 + store i1 %r27, ptr %p27 + %p28 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 27 + store i1 %r28, ptr %p28 + %p29 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 28 + store i1 %r29, ptr %p29 + %p30 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 29 + store i1 %r30, ptr %p30 + %p31 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 30 + store i1 %r31, ptr %p31 + %p32 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 31 + store i1 %r32, ptr %p32 + %p33 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 32 + store i1 %r33, ptr %p33 + %p34 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 33 + store i1 %r34, ptr %p34 + %p35 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 34 + store i1 %r35, ptr %p35 + %p36 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 35 + store i1 %r36, ptr %p36 + %p37 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 36 + store i1 %r37, ptr %p37 + %p38 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 37 + store i1 %r38, ptr %p38 + %p39 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 38 + store i1 %r39, ptr %p39 + %p40 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 39 + store i1 %r40, ptr %p40 + %p41 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 40 + store i1 %r41, ptr %p41 + %p42 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 41 + store i1 %r42, ptr %p42 + %p43 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 42 + store i1 %r43, ptr %p43 + %p44 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 43 + store i1 %r44, ptr %p44 + %p45 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 44 + store i1 %r45, ptr %p45 + %p46 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 45 + store i1 %r46, ptr %p46 + %p47 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 46 + store i1 %r47, ptr %p47 + %p48 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 47 + store i1 %r48, ptr %p48 + %p49 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 48 + store i1 %r49, ptr %p49 + %p50 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 49 + store i1 %r50, ptr %p50 + %p51 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 50 + store i1 %r51, ptr %p51 + %p52 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 51 + store i1 %r52, ptr %p52 + %p53 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 52 + store i1 %r53, ptr %p53 + %p54 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 53 + store i1 %r54, ptr %p54 + %p55 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 54 + store i1 %r55, ptr %p55 + %p56 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 55 + store i1 %r56, ptr %p56 + %p57 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 56 + store i1 %r57, ptr %p57 + %p58 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 57 + store i1 %r58, ptr %p58 + %p59 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 58 + store i1 %r59, ptr %p59 + %p60 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 59 + store i1 %r60, ptr %p60 + %p61 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 60 + store i1 %r61, ptr %p61 + %p62 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 61 + store i1 %r62, ptr %p62 + %p63 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 62 + store i1 %r63, ptr %p63 + %p64 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 63 + store i1 %r64, ptr %p64 + %p65 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 64 + store i1 %r65, ptr %p65 + %p66 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 65 + store i1 %r66, ptr %p66 + %p67 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 66 + store i1 %r67, ptr %p67 + %p68 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 67 + store i1 %r68, ptr %p68 + %p69 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 68 + store i1 %r69, ptr %p69 + %p70 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 69 + store i1 %r70, ptr %p70 + %p71 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 70 + store i1 %r71, ptr %p71 + %p72 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 71 + store i1 %r72, ptr %p72 + %p73 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 72 + store i1 %r73, ptr %p73 + %p74 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 73 + store i1 %r74, ptr %p74 + %p75 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 74 + store i1 %r75, ptr %p75 + %p76 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 75 + store i1 %r76, ptr %p76 + %p77 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 76 + store i1 %r77, ptr %p77 + %p78 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 77 + store i1 %r78, ptr %p78 + %p79 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 78 + store i1 %r79, ptr %p79 + %p80 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 79 + store i1 %r80, ptr %p80 + %p81 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 80 + store i1 %r81, ptr %p81 + %p82 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 81 + store i1 %r82, ptr %p82 + %p83 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 82 + store i1 %r83, ptr %p83 + %p84 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 83 + store i1 %r84, ptr %p84 + %p85 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 84 + store i1 %r85, ptr %p85 + %p86 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 85 + store i1 %r86, ptr %p86 + %p87 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 86 + store i1 %r87, ptr %p87 + %p88 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 87 + store i1 %r88, ptr %p88 + %p89 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 88 + store i1 %r89, ptr %p89 + %p90 = getelementptr inbounds [90 x i1], ptr @G, i32 0, i32 89 + store i1 %r90, ptr %p90 ret void } diff --git a/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll b/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll index c8691c32710ad..7658362773218 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll @@ -31,9 +31,12 @@ %StructEvent = type { target("spirv.Event") } +@G_r = global target("spirv.Event") poison + define spir_kernel void @test_half(ptr addrspace(3) %_arg1, ptr addrspace(1) %_arg2) { entry: %r = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyjPU3AS3Dv2_DF16_PU3AS1KS_mm9ocl_event(i32 2, ptr addrspace(3) %_arg1, ptr addrspace(1) %_arg2, i64 16, i64 10, target("spirv.Event") zeroinitializer) + store target("spirv.Event") %r, ptr @G_r ret void } @@ -42,7 +45,6 @@ declare dso_local spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyjPU ; CHECK: OpFunction ; CHECK: OpFunctionParameter ; CHECK: %[[#Src:]] = OpFunctionParameter -; CHECK: OpVariable %[[#TyStructPtr]] Function ; CHECK: %[[#EventVar:]] = OpVariable %[[#TyEventPtr]] Function ; CHECK: %[[#Dest:]] = OpInBoundsPtrAccessChain ; CHECK: %[[#CopyRes:]] = OpGroupAsyncCopy %[[#TyEvent]] %[[#]] %[[#Dest]] %[[#Src]] %[[#]] %[[#]] %[[#ConstEvent]] diff --git a/llvm/test/CodeGen/SPIRV/uitofp-with-bool.ll b/llvm/test/CodeGen/SPIRV/uitofp-with-bool.ll index 46668645f418b..9c8b4070d834d 100644 --- a/llvm/test/CodeGen/SPIRV/uitofp-with-bool.ll +++ b/llvm/test/CodeGen/SPIRV/uitofp-with-bool.ll @@ -68,6 +68,27 @@ ; SPV-DAG: %[[#ones_64:]] = OpConstantComposite %[[#vec_64]] %[[#one_64]] %[[#one_64]] ; SPV-DAG: %[[#pointer:]] = OpTypePointer CrossWorkgroup %[[#float]] +@G_s1 = global i8 0 +@G_s2 = global i16 0 +@G_s3 = global i32 0 +@G_s4 = global i64 0 +@G_s5 = global <2 x i8> zeroinitializer +@G_s6 = global <2 x i16> zeroinitializer +@G_s7 = global <2 x i32> zeroinitializer +@G_s8 = global <2 x i64> zeroinitializer +@G_z1 = global i8 0 +@G_z2 = global i16 0 +@G_z3 = global i32 0 +@G_z4 = global i64 0 +@G_z5 = global <2 x i8> zeroinitializer +@G_z6 = global <2 x i16> zeroinitializer +@G_z7 = global <2 x i32> zeroinitializer +@G_z8 = global <2 x i64> zeroinitializer +@G_ufp1 = global float 0.0 +@G_ufp2 = global <2 x float> zeroinitializer +@G_sfp1 = global float 0.0 +@G_sfp2 = global <2 x float> zeroinitializer + ; SPV-DAG: OpFunction ; SPV-DAG: %[[#A:]] = OpFunctionParameter %[[#pointer]] ; SPV-DAG: %[[#B:]] = OpFunctionParameter %[[#]] @@ -87,47 +108,67 @@ entry: ; SPV-DAG: %[[#s1]] = OpSelect %[[#int_8]] %[[#i1s]] %[[#mone_8]] %[[#zero_8]] %s1 = sext i1 %i1s to i8 + store i8 %s1, ptr @G_s1 ; SPV-DAG: %[[#s2]] = OpSelect %[[#int_16]] %[[#i1s]] %[[#mone_16]] %[[#zero_16]] %s2 = sext i1 %i1s to i16 + store i16 %s2, ptr @G_s2 ; SPV-DAG: %[[#s3]] = OpSelect %[[#int_32]] %[[#i1s]] %[[#mone_32]] %[[#zero_32]] %s3 = sext i1 %i1s to i32 + store i32 %s3, ptr @G_s3 ; SPV-DAG: %[[#s4]] = OpSelect %[[#int_64]] %[[#i1s]] %[[#mone_64]] %[[#zero_64]] %s4 = sext i1 %i1s to i64 + store i64 %s4, ptr @G_s4 ; SPV-DAG: %[[#s5]] = OpSelect %[[#vec_8]] %[[#i1v]] %[[#mones_8]] %[[#zeros_8]] %s5 = sext <2 x i1> %i1v to <2 x i8> + store <2 x i8> %s5, ptr @G_s5 ; SPV-DAG: %[[#s6]] = OpSelect %[[#vec_16]] %[[#i1v]] %[[#mones_16]] %[[#zeros_16]] %s6 = sext <2 x i1> %i1v to <2 x i16> + store <2 x i16> %s6, ptr @G_s6 ; SPV-DAG: %[[#s7]] = OpSelect %[[#vec_32]] %[[#i1v]] %[[#mones_32]] %[[#zeros_32]] %s7 = sext <2 x i1> %i1v to <2 x i32> + store <2 x i32> %s7, ptr @G_s7 ; SPV-DAG: %[[#s8]] = OpSelect %[[#vec_64]] %[[#i1v]] %[[#mones_64]] %[[#zeros_64]] %s8 = sext <2 x i1> %i1v to <2 x i64> + store <2 x i64> %s8, ptr @G_s8 ; SPV-DAG: %[[#z1]] = OpSelect %[[#int_8]] %[[#i1s]] %[[#one_8]] %[[#zero_8]] %z1 = zext i1 %i1s to i8 + store i8 %z1, ptr @G_z1 ; SPV-DAG: %[[#z2]] = OpSelect %[[#int_16]] %[[#i1s]] %[[#one_16]] %[[#zero_16]] %z2 = zext i1 %i1s to i16 + store i16 %z2, ptr @G_z2 ; SPV-DAG: %[[#z3]] = OpSelect %[[#int_32]] %[[#i1s]] %[[#one_32]] %[[#zero_32]] %z3 = zext i1 %i1s to i32 + store i32 %z3, ptr @G_z3 ; SPV-DAG: %[[#z4]] = OpSelect %[[#int_64]] %[[#i1s]] %[[#one_64]] %[[#zero_64]] %z4 = zext i1 %i1s to i64 + store i64 %z4, ptr @G_z4 ; SPV-DAG: %[[#z5]] = OpSelect %[[#vec_8]] %[[#i1v]] %[[#ones_8]] %[[#zeros_8]] %z5 = zext <2 x i1> %i1v to <2 x i8> + store <2 x i8> %z5, ptr @G_z5 ; SPV-DAG: %[[#z6]] = OpSelect %[[#vec_16]] %[[#i1v]] %[[#ones_16]] %[[#zeros_16]] %z6 = zext <2 x i1> %i1v to <2 x i16> + store <2 x i16> %z6, ptr @G_z6 ; SPV-DAG: %[[#z7]] = OpSelect %[[#vec_32]] %[[#i1v]] %[[#ones_32]] %[[#zeros_32]] %z7 = zext <2 x i1> %i1v to <2 x i32> + store <2 x i32> %z7, ptr @G_z7 ; SPV-DAG: %[[#z8]] = OpSelect %[[#vec_64]] %[[#i1v]] %[[#ones_64]] %[[#zeros_64]] %z8 = zext <2 x i1> %i1v to <2 x i64> + store <2 x i64> %z8, ptr @G_z8 ; SPV-DAG: %[[#ufp1_res:]] = OpSelect %[[#int_32]] %[[#i1s]] %[[#one_32]] %[[#zero_32]] ; SPV-DAG: %[[#ufp1]] = OpConvertUToF %[[#float]] %[[#ufp1_res]] %ufp1 = uitofp i1 %i1s to float + store float %ufp1, ptr @G_ufp1 ; SPV-DAG: %[[#ufp2_res:]] = OpSelect %[[#vec_32]] %[[#i1v]] %[[#ones_32]] %[[#zeros_32]] ; SPV-DAG: %[[#ufp2]] = OpConvertUToF %[[#vec_float]] %[[#ufp2_res]] %ufp2 = uitofp <2 x i1> %i1v to <2 x float> + store <2 x float> %ufp2, ptr @G_ufp2 ; SPV-DAG: %[[#sfp1_res:]] = OpSelect %[[#int_32]] %[[#i1s]] %[[#one_32]] %[[#zero_32]] ; SPV-DAG: %[[#sfp1]] = OpConvertSToF %[[#float]] %[[#sfp1_res]] %sfp1 = sitofp i1 %i1s to float + store float %sfp1, ptr @G_sfp1 ; SPV-DAG: %[[#sfp2_res:]] = OpSelect %[[#vec_32]] %[[#i1v]] %[[#ones_32]] %[[#zeros_32]] ; SPV-DAG: %[[#sfp2]] = OpConvertSToF %[[#vec_float]] %[[#sfp2_res]] %sfp2 = sitofp <2 x i1> %i1v to <2 x float> + store <2 x float> %sfp2, ptr @G_sfp2 ret void } diff --git a/llvm/test/DebugInfo/RISCV/relax_dwo_ranges.ll b/llvm/test/DebugInfo/RISCV/relax_dwo_ranges.ll index ab76ce04dcdb0..3916a205dd19c 100644 --- a/llvm/test/DebugInfo/RISCV/relax_dwo_ranges.ll +++ b/llvm/test/DebugInfo/RISCV/relax_dwo_ranges.ll @@ -20,68 +20,81 @@ ; RUN: llc -dwarf-version=5 -split-dwarf-file=foo.dwo -O0 -mtriple=riscv64-unknown-linux-gnu -filetype=obj relax_dwo_ranges.ll -o %t.o ; RUN: llvm-dwarfdump -v %t.o | FileCheck --check-prefix=DWARF5 %s -; RUN: llvm-dwarfdump --debug-info %t.o 2> %t.txt -; RUN: FileCheck --input-file=%t.txt %s --check-prefix=RELOCS --implicit-check-not=warning: +; RUN: llvm-dwarfdump --debug-info %t.o > /dev/null 2>&1 | count 0 +; RUN: llvm-objdump -h %t.o | FileCheck --check-prefix=HDR %s ; RUN: llc -dwarf-version=4 -split-dwarf-file=foo.dwo -O0 -mtriple=riscv64-unknown-linux-gnu -filetype=obj relax_dwo_ranges.ll -o %t.o ; RUN: llvm-dwarfdump -v %t.o | FileCheck --check-prefix=DWARF4 %s -; RUN: llvm-dwarfdump --debug-info %t.o 2> %t.txt -; RUN: FileCheck --input-file=%t.txt %s --check-prefix=RELOCS --implicit-check-not=warning: +; RUN: llvm-dwarfdump --debug-info %t.o > /dev/null 2>&1 | count 0 +; RUN: llvm-objdump -h %t.o | FileCheck --check-prefix=HDR %s -; Currently, square() still uses an offset to represent the function's end address, -; which requires a relocation here. -; RELOCS: warning: unexpected relocations for dwo section '.debug_info.dwo' +; Make sure we don't produce any relocations in any .dwo section +; HDR-NOT: .rela.{{.*}}.dwo +; Ensure that 'square()' function uses indexed start and end addresses ; DWARF5: .debug_info.dwo contents: ; DWARF5: DW_TAG_subprogram -; DWARF5-NEXT: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0x0000000000000000 ".text") -; DWARF5-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000000) -; DWARF5: DW_AT_name {{.*}} "square") +; DWARF5-NEXT: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0x0000000000000000 ".text") +; DWARF5-NEXT: DW_AT_high_pc [DW_FORM_addrx] (indexed (00000001) address = 0x000000000000002c ".text") +; DWARF5: DW_AT_name {{.*}} "square") ; DWARF5: DW_TAG_formal_parameter +; HDR-NOT: .rela.{{.*}}.dwo + ; Ensure there is no unnecessary addresses in .o file ; DWARF5: .debug_addr contents: ; DWARF5: Addrs: [ ; DWARF5-NEXT: 0x0000000000000000 ; DWARF5-NEXT: 0x000000000000002c +; DWARF5-NEXT: 0x000000000000002c ; DWARF5-NEXT: 0x000000000000003e ; DWARF5-NEXT: 0x000000000000006e ; DWARF5-NEXT: ] +; HDR-NOT: .rela.{{.*}}.dwo + ; Ensure that 'boo()' and 'main()' use DW_RLE_startx_length and DW_RLE_startx_endx ; entries respectively ; DWARF5: .debug_rnglists.dwo contents: ; DWARF5: ranges: -; DWARF5-NEXT: 0x00000014: [DW_RLE_startx_length]: 0x0000000000000001, 0x0000000000000012 => [0x000000000000002c, 0x000000000000003e) +; DWARF5-NEXT: 0x00000014: [DW_RLE_startx_length]: 0x0000000000000002, 0x0000000000000012 => [0x000000000000002c, 0x000000000000003e) ; DWARF5-NEXT: 0x00000017: [DW_RLE_end_of_list ] -; DWARF5-NEXT: 0x00000018: [DW_RLE_startx_endx ]: 0x0000000000000002, 0x0000000000000003 => [0x000000000000003e, 0x000000000000006e) +; DWARF5-NEXT: 0x00000018: [DW_RLE_startx_endx ]: 0x0000000000000003, 0x0000000000000004 => [0x000000000000003e, 0x000000000000006e) ; DWARF5-NEXT: 0x0000001b: [DW_RLE_end_of_list ] ; DWARF5-EMPTY: +; HDR-NOT: .rela.{{.*}}.dwo + ; DWARF4: .debug_info.dwo contents: ; DWARF4: DW_TAG_subprogram -; DWARF4-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000000) address = 0x0000000000000000 ".text") -; DWARF4-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000000) -; DWARF4: DW_AT_name {{.*}} "square") +; DWARF4-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000000) address = 0x0000000000000000 ".text") +; DWARF4-NEXT: DW_AT_high_pc [DW_FORM_GNU_addr_index] (indexed (00000001) address = 0x000000000000002c ".text") +; DWARF4: DW_AT_name {{.*}} "square") ; DWARF4: DW_TAG_subprogram -; DWARF4-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000001) address = 0x000000000000002c ".text") +; DWARF4-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000002) address = 0x000000000000002c ".text") ; DWARF4-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000012) -; DWARF4: DW_AT_name {{.*}} "boo") +; DWARF4: DW_AT_name {{.*}} "boo") ; DWARF4: DW_TAG_subprogram -; DWARF4-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000002) address = 0x000000000000003e ".text") -; DWARF4-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000000) -; DWARF4: DW_AT_name {{.*}} "main") +; DWARF4-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000003) address = 0x000000000000003e ".text") +; DWARF4-NEXT: DW_AT_high_pc [DW_FORM_GNU_addr_index] (indexed (00000004) address = 0x000000000000006e ".text") +; DWARF4: DW_AT_name {{.*}} "main") + +; HDR-NOT: .rela.{{.*}}.dwo ; Ensure there is no unnecessary addresses in .o file ; DWARF4: .debug_addr contents: ; DWARF4: Addrs: [ ; DWARF4-NEXT: 0x0000000000000000 ; DWARF4-NEXT: 0x000000000000002c +; DWARF4-NEXT: 0x000000000000002c ; DWARF4-NEXT: 0x000000000000003e +; DWARF4-NEXT: 0x000000000000006e ; DWARF4-NEXT: ] +; HDR-NOT: .rela.{{.*}}.dwo + #--- relax_dwo_ranges.cpp __attribute__((noinline)) int boo(); diff --git a/llvm/test/TableGen/DuplicateFieldValues.td b/llvm/test/TableGen/DuplicateFieldValues.td index 50c77fa88ccec..85cb5bbfb6c56 100644 --- a/llvm/test/TableGen/DuplicateFieldValues.td +++ b/llvm/test/TableGen/DuplicateFieldValues.td @@ -82,3 +82,4 @@ let BaseName = "0" in { def E0 : I, ABCRel, isEForm; } +defm : RemapAllTargetPseudoPointerOperands; diff --git a/llvm/test/TableGen/RegClassByHwMode.td b/llvm/test/TableGen/RegClassByHwMode.td index a21a396f7fd52..ec723f8b70478 100644 --- a/llvm/test/TableGen/RegClassByHwMode.td +++ b/llvm/test/TableGen/RegClassByHwMode.td @@ -13,6 +13,7 @@ include "llvm/Target/Target.td" // INSTRINFO-EMPTY: // INSTRINFO-NEXT: enum { // INSTRINFO-NEXT: PHI +// INSTRINFO: LOAD_STACK_GUARD = [[LOAD_STACK_GUARD_OPCODE:[0-9]+]] // INSTRINFO: }; // INSTRINFO: enum RegClassByHwModeUses : uint16_t { // INSTRINFO-NEXT: MyPtrRC, @@ -22,10 +23,20 @@ include "llvm/Target/Target.td" // INSTRINFO-EMPTY: // INSTRINFO-NEXT: } // namespace llvm::MyTarget + +// INSTRINFO: { [[LOAD_STACK_GUARD_OPCODE]], 1, 1, 0, 0, 0, 0, [[LOAD_STACK_GUARD_OP_INDEX:[0-9]+]], MyTargetImpOpBase + 0, 0|(1ULL<; +defm : RemapAllTargetPseudoPointerOperands; + def MyTargetISA : InstrInfo; def MyTarget : Target { let InstructionSet = MyTargetISA; } diff --git a/llvm/test/TableGen/def-multiple-operands.td b/llvm/test/TableGen/def-multiple-operands.td index 5d215056920e8..dc5ea09eff9ba 100644 --- a/llvm/test/TableGen/def-multiple-operands.td +++ b/llvm/test/TableGen/def-multiple-operands.td @@ -35,3 +35,5 @@ def InstA : Instruction { field bits<8> SoftFail = 0; let hasSideEffects = false; } + +defm : RemapAllTargetPseudoPointerOperands; diff --git a/llvm/test/TableGen/get-named-operand-idx.td b/llvm/test/TableGen/get-named-operand-idx.td index b3569510dd6fc..7982822c0a895 100644 --- a/llvm/test/TableGen/get-named-operand-idx.td +++ b/llvm/test/TableGen/get-named-operand-idx.td @@ -48,6 +48,8 @@ def InstD : InstBase { let UseNamedOperandTable = 0; } +defm : RemapAllTargetPseudoPointerOperands; + // CHECK-LABEL: #ifdef GET_INSTRINFO_OPERAND_ENUM // CHECK-NEXT: #undef GET_INSTRINFO_OPERAND_ENUM // CHECK-EMPTY: diff --git a/llvm/test/TableGen/get-operand-type-no-expand.td b/llvm/test/TableGen/get-operand-type-no-expand.td index a0a8fa957f9b6..fcaf3684528b2 100644 --- a/llvm/test/TableGen/get-operand-type-no-expand.td +++ b/llvm/test/TableGen/get-operand-type-no-expand.td @@ -46,3 +46,5 @@ def InstA : Instruction { // CHECK-NOEXPAND: /* InstA */ // CHECK-NOEXPAND-NEXT: i512complex, i8complex, i32imm, // CHECK-NOEXPAND: #endif // GET_INSTRINFO_OPERAND_TYPE + +defm : RemapAllTargetPseudoPointerOperands; diff --git a/llvm/test/TableGen/get-operand-type.td b/llvm/test/TableGen/get-operand-type.td index b2f63cafd6a89..49fbb63ac5974 100644 --- a/llvm/test/TableGen/get-operand-type.td +++ b/llvm/test/TableGen/get-operand-type.td @@ -18,6 +18,8 @@ def OpB : Operand; def RegOp : RegisterOperand; +defm : RemapAllTargetPseudoPointerOperands; + def InstA : Instruction { let Size = 1; let OutOperandList = (outs OpA:$a); diff --git a/llvm/test/TableGen/target-specialized-pseudos.td b/llvm/test/TableGen/target-specialized-pseudos.td index 99c63f3ec29d9..3953a36101fe0 100644 --- a/llvm/test/TableGen/target-specialized-pseudos.td +++ b/llvm/test/TableGen/target-specialized-pseudos.td @@ -1,6 +1,11 @@ -// RUN: llvm-tblgen -gen-instr-info -I %p/../../include %s -DONECASE -o - | FileCheck -check-prefixes=CHECK,ONECASE %s // RUN: llvm-tblgen -gen-instr-info -I %p/../../include %s -DALLCASES -o - | FileCheck -check-prefixes=CHECK,ALLCASES %s -// RUN: not llvm-tblgen -gen-instr-info -I %p/../../include %s -DERROR -o /dev/null 2>&1 | FileCheck -check-prefix=ERROR %s +// RUN: not llvm-tblgen -gen-instr-info -I %p/../../include %s -DONECASE -o /dev/null 2>&1 | FileCheck -check-prefixes=ERROR-MISSING %s +// RUN: not llvm-tblgen -gen-instr-info -I %p/../../include %s -DMULTIPLE_OVERRIDE_ERROR -o /dev/null 2>&1 | FileCheck -implicit-check-not=error: -check-prefix=MULTIPLE-OVERRIDE-ERROR %s +// RUN: not llvm-tblgen -gen-instr-info -I %p/../../include %s -DALLCASES -DERROR_NONPSEUDO -o /dev/null 2>&1 | FileCheck -implicit-check-not=error: -check-prefix=ERROR-NONPSEUDO %s + + +// def PREALLOCATED_ARG : StandardPseudoInstruction { + // CHECK: namespace llvm::MyTarget { // CHECK: enum { @@ -20,8 +25,6 @@ // CHECK-NEXT: { [[MY_MOV_OPCODE]], 2, 1, 2, 0, 0, 0, {{[0-9]+}}, MyTargetImpOpBase + 0, 0|(1ULL<; #endif -#ifdef ERROR +#ifdef MULTIPLE_OVERRIDE_ERROR def MY_LOAD_STACK_GUARD_0 : TargetSpecializedStandardPseudoInstruction; -// ERROR: :[[@LINE+1]]:5: error: multiple overrides of 'LOAD_STACK_GUARD' defined +// MULTIPLE-OVERRIDE-ERROR: :[[@LINE+1]]:5: error: multiple overrides of 'LOAD_STACK_GUARD' defined def MY_LOAD_STACK_GUARD_1 : TargetSpecializedStandardPseudoInstruction; #endif +#ifdef ERROR_NONPSEUDO + +// FIXME: Double error +// ERROR-NONPSEUDO: [[@LINE+2]]:5: error: non-pseudoinstruction user of PointerLikeRegClass +// ERROR-NONPSEUDO: [[@LINE+1]]:5: error: non-pseudoinstruction user of PointerLikeRegClass +def NON_PSEUDO : TestInstruction { + let OutOperandList = (outs XRegs:$dst); + let InOperandList = (ins ptr_rc:$src); + let AsmString = "non_pseudo $dst, $src"; +} + +#endif + def MY_MOV : TestInstruction { let OutOperandList = (outs XRegs:$dst); let InOperandList = (ins XRegs:$src); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll index 23918427e7003..95b4dcb23dd47 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll @@ -14,7 +14,7 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1]] = load <2 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[WIDE_LOAD]], <2 x i32> @@ -22,7 +22,7 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) { ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP6]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP7]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 9609982b2c68f..2e3ddc8899ec7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -63,7 +63,7 @@ define void @loop_dependent_cond(ptr %src, ptr noalias %dst, i64 %N) { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] ; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP3]], i32 2 +; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP3]], i64 2 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8 ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP6]], align 8 ; DEFAULT-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD]]) @@ -259,7 +259,7 @@ define void @latch_branch_cost(ptr %dst) { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i32 16 +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 16 ; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP2]], align 1 ; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll index 0a433ec76acf4..f0664197dcb94 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll @@ -137,7 +137,7 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 @@ -210,7 +210,7 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 % ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP5]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 @@ -279,7 +279,7 @@ define void @trunc_invariant_sdiv_result(i32 %a, i32 %b, ptr noalias %src, ptr % ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16> @@ -287,7 +287,7 @@ define void @trunc_invariant_sdiv_result(i32 %a, i32 %b, ptr noalias %src, ptr % ; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[TMP0]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i16> [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 16 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i64 16 ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -413,7 +413,7 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[WIDE_LOAD]] to <4 x i1> @@ -427,15 +427,16 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP8]] to <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 4 ; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index 2180f18750bf2..580c568c373f1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -27,7 +27,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i8> [[VEC_IND]], splat (i8 16) ; CHECK-NEXT: [[INDEX4:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[INDEX4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 16 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP8]], align 8 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 3) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll index a3b7392dd280f..549df337e6907 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll @@ -19,17 +19,17 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 32 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 48 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 16 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 32 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 48 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 16 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 32 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i8>, ptr [[TMP9]], align 1 @@ -39,9 +39,9 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: [[TMP13:%.*]] = add <16 x i8> [[WIDE_LOAD7]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP14:%.*]] = add <16 x i8> [[WIDE_LOAD8]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i32 16 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i32 32 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i32 48 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 16 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 32 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 48 ; CHECK-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP15]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP17]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP13]], ptr [[TMP18]], align 1 @@ -54,7 +54,7 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 8 @@ -71,7 +71,7 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: store <8 x i8> [[TMP26]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 8 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] -; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -89,7 +89,7 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX6]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -128,17 +128,17 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 16 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 24 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 16 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i16>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 8 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 16 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 24 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i16>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i16>, ptr [[TMP9]], align 1 @@ -148,22 +148,22 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: [[TMP13:%.*]] = add <8 x i16> [[WIDE_LOAD7]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP14:%.*]] = add <8 x i16> [[WIDE_LOAD8]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 8 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 16 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 24 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 8 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 16 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 24 ; CHECK-NEXT: store <8 x i16> [[TMP11]], ptr [[TMP15]], align 1 ; CHECK-NEXT: store <8 x i16> [[TMP12]], ptr [[TMP17]], align 1 ; CHECK-NEXT: store <8 x i16> [[TMP13]], ptr [[TMP18]], align 1 ; CHECK-NEXT: store <8 x i16> [[TMP14]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 4 @@ -180,7 +180,7 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store <4 x i16> [[TMP26]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] -; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -198,7 +198,7 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX6]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -237,17 +237,17 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 12 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP9]], align 1 @@ -257,22 +257,22 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD8]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 4 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 8 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 12 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 4 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 8 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 12 ; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP15]], align 1 ; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP17]], align 1 ; CHECK-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP18]], align 1 ; CHECK-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 4 @@ -289,7 +289,7 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store <4 x i32> [[TMP26]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] -; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -307,7 +307,7 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX6]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -347,9 +347,9 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 16 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 32 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 48 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 32 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[ARG]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 @@ -358,9 +358,9 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[WIDE_LOAD4]], splat (i8 10) ; CHECK-NEXT: [[TMP6:%.*]] = add <16 x i8> [[WIDE_LOAD5]], splat (i8 10) ; CHECK-NEXT: [[TMP7:%.*]] = add <16 x i8> [[WIDE_LOAD6]], splat (i8 10) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 16 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 32 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 48 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 16 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 32 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 48 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[ARG2]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP8]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1 @@ -369,7 +369,7 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -382,7 +382,7 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -397,7 +397,7 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK-NEXT: store i8 [[SELECT]], ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 20 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -447,16 +447,16 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 12 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 12 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP8]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP9]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -464,7 +464,7 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 4 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP12]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF1:%.*]] = urem i64 [[TMP2]], 4 @@ -479,7 +479,7 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[NEXT_GEP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX3]], 4 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC2]] -; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC2]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -491,7 +491,7 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: store i32 0, ptr [[IV]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = getelementptr i8, ptr [[IV]], i64 4 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV]], [[PTR_END]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 5e92123891b31..85726c161cc54 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -29,7 +29,7 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]]) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]]) -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2 ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -117,7 +117,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2 ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -201,7 +201,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -285,7 +285,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 10) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 10) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2 ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -399,7 +399,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 ; CHECK-NEXT: store <2 x i8> [[VEC_IND]], ptr [[TMP1]], align 1 ; CHECK-NEXT: store <2 x i8> [[STEP_ADD]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll index cb4e99332c04b..4eacc55a99f72 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll @@ -20,7 +20,7 @@ define void @epilogue_vectorization_fix_scalar_resume_values(ptr %dst, i64 %n) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP0]], align 1 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll index 35d7e2cc8c586..feb0175e75542 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll @@ -21,16 +21,16 @@ define double @fp128_fmuladd_reduction(ptr %start0, ptr %start1, ptr %end0, ptr ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[START0]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[START1]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr fp128, ptr [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr fp128, ptr [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr fp128, ptr [[TMP1]], i32 6 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr fp128, ptr [[TMP1]], i64 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr fp128, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr fp128, ptr [[TMP1]], i64 6 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x fp128>, ptr [[TMP1]], align 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x fp128>, ptr [[TMP24]], align 16 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x fp128>, ptr [[TMP4]], align 16 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x fp128>, ptr [[TMP5]], align 16 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP3]], i32 4 -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[TMP3]], i32 6 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[TMP3]], i64 6 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP3]], align 16 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP28]], align 16 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x double>, ptr [[TMP35]], align 16 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll index c94b3a4c49555..c692ba5b06690 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll @@ -26,7 +26,7 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i8> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD1]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> @@ -34,7 +34,7 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP9:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = add <16 x i8> [[WIDE_LOAD1]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 16 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -119,7 +119,7 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[VECTOR_RECUR4:%.*]] = phi <16 x i8> [ [[VECTOR_RECUR_INIT3]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD5]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> @@ -135,7 +135,7 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP17:%.*]] = add <16 x i8> [[TMP15]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18:%.*]] = add <16 x i8> [[TMP16]], [[WIDE_LOAD5]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i32 16 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP19]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP18]], ptr [[TMP22]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll index faee4c1194018..591bdabca65e7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll @@ -56,9 +56,9 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 ; CHECK-UF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-UF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4 -; CHECK-UF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 8 -; CHECK-UF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 12 +; CHECK-UF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 4 +; CHECK-UF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 8 +; CHECK-UF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 12 ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP8]], <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP17]], <4 x i1> [[ACTIVE_LANE_MASK4]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP18]], <4 x i1> [[ACTIVE_LANE_MASK5]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll index 6902dd990509e..a04367f32dd01 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll @@ -53,7 +53,7 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) @@ -128,10 +128,10 @@ define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV]] ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC_0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i32 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[GEP_SRC_1]], align 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI2]], <4 x float> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll index 193424d3eb70a..0bddc498f9e83 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll @@ -53,7 +53,7 @@ define float @fminnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll index f15f04fe5f6f2..3a9d5c34bacab 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll @@ -20,17 +20,17 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD5]], <4 x float> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -97,17 +97,17 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD5]], <4 x float> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -174,17 +174,17 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD5]], <2 x double> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -251,17 +251,17 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD5]], <2 x double> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -328,17 +328,17 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw half, ptr [[TMP2]], i32 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw half, ptr [[TMP2]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x half>, ptr [[TMP2]], align 2 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x half>, ptr [[TMP6]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw half, ptr [[TMP4]], i32 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw half, ptr [[TMP4]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x half>, ptr [[TMP4]], align 2 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x half>, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[TMP11:%.*]] = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> [[WIDE_LOAD]], <8 x half> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> [[WIDE_LOAD5]], <8 x half> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw half, ptr [[TMP7]], i32 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw half, ptr [[TMP7]], i64 8 ; CHECK-NEXT: store <8 x half> [[TMP11]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store <8 x half> [[TMP13]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -405,17 +405,17 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw half, ptr [[TMP2]], i32 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw half, ptr [[TMP2]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x half>, ptr [[TMP2]], align 2 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x half>, ptr [[TMP6]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw half, ptr [[TMP4]], i32 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw half, ptr [[TMP4]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x half>, ptr [[TMP4]], align 2 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x half>, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[TMP11:%.*]] = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> [[WIDE_LOAD]], <8 x half> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> [[WIDE_LOAD5]], <8 x half> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw half, ptr [[TMP7]], i32 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw half, ptr [[TMP7]], i64 8 ; CHECK-NEXT: store <8 x half> [[TMP11]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store <8 x half> [[TMP13]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 56edee44fe3b1..21b21774d18cf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -62,7 +62,7 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) { ; COST1: [[VECTOR_BODY]]: ; COST1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; COST1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[INDEX]] -; COST1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 +; COST1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 ; COST1-NEXT: store <16 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1 ; COST1-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP0]], align 1 ; COST1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -328,7 +328,7 @@ define void @invalid_legacy_cost(i64 %N, ptr %x) #0 { ; COST1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP1]], i64 0 ; COST1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer ; COST1-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]] -; COST1-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[TMP2]], i32 2 +; COST1-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[TMP2]], i64 2 ; COST1-NEXT: store <2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; COST1-NEXT: store <2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; COST1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 42a1940925968..7b42e565e127d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -25,7 +25,7 @@ define i32 @multi_exit_iv_uniform(i32 %a, i64 %N, ptr %dst) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP5]], i64 4 ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8 ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP9]], align 8 ; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[VEC_PHI]], splat (i32 -1) @@ -106,7 +106,7 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i32>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i32> [[WIDE_LOAD4]] to <2 x i64> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -161,7 +161,7 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i32> [[WIDE_LOAD3]] to <4 x i64> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -205,7 +205,7 @@ define void @wide_truncated_iv(ptr %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ , [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i32 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 8 ; CHECK-NEXT: store <8 x i8> [[VEC_IND]], ptr [[TMP2]], align 1 ; CHECK-NEXT: store <8 x i8> [[STEP_ADD]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -287,7 +287,7 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i32> [[TMP7]] to <4 x i64> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll index 9b4151f30d640..f7060ec3512ac 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll @@ -35,9 +35,9 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4: vector.body: ; INTERLEAVE-4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; INTERLEAVE-4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16 -; INTERLEAVE-4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 32 -; INTERLEAVE-4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 48 +; INTERLEAVE-4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16 +; INTERLEAVE-4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 32 +; INTERLEAVE-4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 48 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 @@ -55,9 +55,9 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP15]], <16 x i8> [[BROADCAST_SPLAT]], <16 x i8> [[TMP19]] ; INTERLEAVE-4-NEXT: [[TMP24:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> [[BROADCAST_SPLAT]], <16 x i8> [[TMP20]] ; INTERLEAVE-4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] -; INTERLEAVE-4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 16 -; INTERLEAVE-4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 32 -; INTERLEAVE-4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 48 +; INTERLEAVE-4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 16 +; INTERLEAVE-4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 32 +; INTERLEAVE-4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 48 ; INTERLEAVE-4-NEXT: store <16 x i8> [[TMP21]], ptr [[TMP25]], align 1 ; INTERLEAVE-4-NEXT: store <16 x i8> [[TMP22]], ptr [[TMP30]], align 1 ; INTERLEAVE-4-NEXT: store <16 x i8> [[TMP23]], ptr [[TMP31]], align 1 @@ -70,7 +70,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; INTERLEAVE-4: vec.epilog.iter.check: ; INTERLEAVE-4-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; INTERLEAVE-4-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; INTERLEAVE-4-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; INTERLEAVE-4: vec.epilog.ph: ; INTERLEAVE-4-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; INTERLEAVE-4-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[N]], 8 @@ -91,7 +91,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: store <8 x i8> [[TMP39]], ptr [[TMP40]], align 1 ; INTERLEAVE-4-NEXT: [[INDEX_NEXT18]] = add nuw i64 [[INDEX12]], 8 ; INTERLEAVE-4-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT18]], [[N_VEC10]] -; INTERLEAVE-4-NEXT: br i1 [[TMP42]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; INTERLEAVE-4-NEXT: br i1 [[TMP42]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; INTERLEAVE-4: vec.epilog.middle.block: ; INTERLEAVE-4-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[N_VEC10]] ; INTERLEAVE-4-NEXT: br i1 [[CMP_N11]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -109,7 +109,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: store i8 [[SEL]], ptr [[GEP_DST]], align 1 ; INTERLEAVE-4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; INTERLEAVE-4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; INTERLEAVE-4-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; INTERLEAVE-4-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; INTERLEAVE-4: exit: ; INTERLEAVE-4-NEXT: ret void ; @@ -137,7 +137,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2: vector.body: ; INTERLEAVE-2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; INTERLEAVE-2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; INTERLEAVE-2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; INTERLEAVE-2-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; INTERLEAVE-2-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; INTERLEAVE-2-NEXT: [[TMP7:%.*]] = icmp sgt <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -147,7 +147,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> [[BROADCAST_SPLAT]], <16 x i8> [[TMP9]] ; INTERLEAVE-2-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP8]], <16 x i8> [[BROADCAST_SPLAT]], <16 x i8> [[TMP10]] ; INTERLEAVE-2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] -; INTERLEAVE-2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 16 +; INTERLEAVE-2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i64 16 ; INTERLEAVE-2-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP13]], align 1 ; INTERLEAVE-2-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP16]], align 1 ; INTERLEAVE-2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -158,7 +158,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; INTERLEAVE-2: vec.epilog.iter.check: ; INTERLEAVE-2-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; INTERLEAVE-2-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; INTERLEAVE-2-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; INTERLEAVE-2: vec.epilog.ph: ; INTERLEAVE-2-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; INTERLEAVE-2-NEXT: [[N_MOD_VF7:%.*]] = urem i64 [[N]], 8 @@ -179,7 +179,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2-NEXT: store <8 x i8> [[TMP23]], ptr [[TMP24]], align 1 ; INTERLEAVE-2-NEXT: [[INDEX_NEXT16]] = add nuw i64 [[INDEX10]], 8 ; INTERLEAVE-2-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC8]] -; INTERLEAVE-2-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; INTERLEAVE-2-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; INTERLEAVE-2: vec.epilog.middle.block: ; INTERLEAVE-2-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[N]], [[N_VEC8]] ; INTERLEAVE-2-NEXT: br i1 [[CMP_N9]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll index aa94763b44a30..53cb0653fd241 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll @@ -29,9 +29,9 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] -; INTERLEAVE-4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -; INTERLEAVE-4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -; INTERLEAVE-4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 +; INTERLEAVE-4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; INTERLEAVE-4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 +; INTERLEAVE-4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 1 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP1]], align 1 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP2]], align 1 @@ -103,7 +103,7 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-2-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-2-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] -; INTERLEAVE-2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; INTERLEAVE-2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; INTERLEAVE-2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 1 ; INTERLEAVE-2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 1 ; INTERLEAVE-2-NEXT: [[TMP2]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll index ee3a4a04566c9..3eb42845bec4a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -36,12 +36,12 @@ define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[OFFSET_IDX2]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD]], <8 x i16> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD4]], <8 x i16> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 8 ; CHECK-NEXT: store <8 x i16> [[TMP2]], ptr [[NEXT_GEP3]], align 2 ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[TMP4]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -160,12 +160,12 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD]], <16 x i8> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD3]], <16 x i8> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 16 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[NEXT_GEP2]], align 2 ; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll index 0a9494e4c7ade..c43d62404006d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll @@ -22,7 +22,7 @@ define void @licm_replicate_call(double %x, ptr %dst) { ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP6]], ptr [[TMP8]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP7]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index c768fec31a497..bdbf08aecf6b3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -103,7 +103,7 @@ define void @vectorize_without_optsize(ptr %p, i32 %x, i64 %n) { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; DEFAULT-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -621,17 +621,17 @@ define void @dont_vectorize_with_minsize() { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 8 +; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i32 8 +; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 ; DEFAULT-NEXT: [[TMP7:%.*]] = mul nsw <8 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; DEFAULT-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD3]] ; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP9]], i32 8 +; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP9]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP9]], align 2 ; DEFAULT-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[TMP11]], align 2 ; DEFAULT-NEXT: [[TMP12:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16> @@ -737,17 +737,17 @@ define void @vectorization_forced_minsize_reduce_width() { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 8 +; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i32 8 +; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 ; DEFAULT-NEXT: [[TMP7:%.*]] = mul nsw <8 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; DEFAULT-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD3]] ; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP9]], i32 8 +; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP9]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP9]], align 2 ; DEFAULT-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[TMP11]], align 2 ; DEFAULT-NEXT: [[TMP12:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll index 0ee6b52a2450b..3142227815383 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll @@ -61,13 +61,13 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP13]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP13]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP13]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP14:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -164,13 +164,13 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP13]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP13]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP13]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP14:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -223,11 +223,11 @@ define i32 @sudot_neon(ptr %a, ptr %b) #1 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 16 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -259,13 +259,13 @@ define i32 @sudot_neon(ptr %a, ptr %b) #1 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP8:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -318,11 +318,11 @@ define i32 @usdot_neon(ptr %a, ptr %b) #1 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 16 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -354,13 +354,13 @@ define i32 @usdot_neon(ptr %a, ptr %b) #1 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP4:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP8:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll index c6c5c5105d540..b2be0e1d7a442 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll @@ -44,11 +44,11 @@ define i32 @dotp(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -249,7 +249,7 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 ; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] -; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -513,13 +513,13 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) { ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> @@ -791,10 +791,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]] -; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP38]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = sext <16 x i8> [[WIDE_LOAD9]] to <16 x i32> @@ -805,10 +805,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = sext <16 x i8> [[WIDE_LOAD8]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = mul nsw <16 x i32> [[TMP19]], [[TMP17]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE11]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP21]]) -; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD13:%.*]] = load <16 x i8>, ptr [[TMP22]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD14:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x i8>, ptr [[TMP26]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = sext <16 x i8> [[WIDE_LOAD12]] to <16 x i32> @@ -819,10 +819,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sext <16 x i8> [[WIDE_LOAD15]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = mul nsw <16 x i32> [[TMP27]], [[TMP25]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE17]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP29]]) -; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD18:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD19:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD20:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD21:%.*]] = load <16 x i8>, ptr [[TMP36]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = sext <16 x i8> [[WIDE_LOAD18]] to <16 x i32> @@ -833,10 +833,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = sext <16 x i8> [[WIDE_LOAD21]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = mul nsw <16 x i32> [[TMP35]], [[TMP33]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP37]]) -; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD24:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD25:%.*]] = load <16 x i8>, ptr [[TMP42]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD26:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD27:%.*]] = load <16 x i8>, ptr [[TMP46]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = sext <16 x i8> [[WIDE_LOAD24]] to <16 x i32> @@ -1811,13 +1811,13 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index ab593f6f8bb6b..71eb5476b7ac5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -44,11 +44,11 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX1]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP20]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP20]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP20]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX1]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP28]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP28]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP28]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -162,16 +162,16 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE14:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 32 -; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 48 +; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 32 +; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 32 -; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 48 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i64 32 +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i64 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[NEXT_GEP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i8>, ptr [[TMP18]], align 1 @@ -299,16 +299,16 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX2]] -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 8 -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 24 +; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 8 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 24 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP0]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i16>, ptr [[TMP10]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i16>, ptr [[TMP11]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 8 -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 24 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 8 +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 24 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[NEXT_GEP3]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD11:%.*]] = load <8 x i16>, ptr [[TMP18]], align 2 @@ -525,7 +525,7 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 ; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] -; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -789,13 +789,13 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP10]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP10]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP17]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP17]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP17]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> @@ -1080,10 +1080,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]] -; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP43]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i8>, ptr [[TMP12]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = sext <16 x i8> [[WIDE_LOAD9]] to <16 x i32> @@ -1094,10 +1094,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = sext <16 x i8> [[WIDE_LOAD8]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = mul nsw <16 x i32> [[TMP16]], [[TMP17]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE11]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP18]]) -; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD13:%.*]] = load <16 x i8>, ptr [[TMP19]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD14:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x i8>, ptr [[TMP20]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = sext <16 x i8> [[WIDE_LOAD12]] to <16 x i32> @@ -1108,10 +1108,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sext <16 x i8> [[WIDE_LOAD15]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = mul nsw <16 x i32> [[TMP24]], [[TMP25]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE17]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP26]]) -; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD18:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD19:%.*]] = load <16 x i8>, ptr [[TMP27]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD20:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD21:%.*]] = load <16 x i8>, ptr [[TMP28]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = sext <16 x i8> [[WIDE_LOAD18]] to <16 x i32> @@ -1122,10 +1122,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = sext <16 x i8> [[WIDE_LOAD21]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = mul nsw <16 x i32> [[TMP48]], [[TMP33]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE23]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP34]]) -; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD24:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD25:%.*]] = load <16 x i8>, ptr [[TMP35]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD26:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD27:%.*]] = load <16 x i8>, ptr [[TMP36]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = sext <16 x i8> [[WIDE_LOAD24]] to <16 x i32> @@ -1450,13 +1450,13 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP3]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP3]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP8]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP8]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -1572,12 +1572,12 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP14]] -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP15]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i64> @@ -1880,7 +1880,7 @@ define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 8 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i64 8 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64> @@ -2009,7 +2009,7 @@ define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 8 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i64 8 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64> @@ -2146,10 +2146,10 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE6:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[NEXT_GEP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = zext <16 x i8> [[WIDE_LOAD4]] to <16 x i64> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll index bd9fae6cd610b..80edfb5f0b6ff 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll @@ -20,7 +20,7 @@ define i32 @partial_reduce_with_non_constant_start_value(ptr %src, i32 %rdx.star ; IC2-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 +; IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; IC2-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; IC2-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -73,9 +73,9 @@ define i32 @partial_reduce_with_non_constant_start_value(ptr %src, i32 %rdx.star ; IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE8:%.*]], %[[VECTOR_BODY]] ] ; IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE9:%.*]], %[[VECTOR_BODY]] ] ; IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 -; IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 32 -; IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 48 +; IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32 +; IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 48 ; IC4-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll index 672d19b1edeba..a439f5189794a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll @@ -16,13 +16,13 @@ define i32 @not_dotp(ptr %a, ptr %b) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll index 6dae09ef97e1c..1f5d5f0ea218e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll @@ -45,11 +45,11 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP7]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP7]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP14]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP14]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP14]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll index 46ec858d7455c..dd2fe09a9e593 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll @@ -41,7 +41,7 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -134,7 +134,7 @@ define i32 @zext_add_reduc_i8_i32_neon(ptr %a) #2 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -223,7 +223,7 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i64> @@ -317,7 +317,7 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i32 8 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i64 8 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64> @@ -413,9 +413,9 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE8:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 32 -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP1]], i64 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 @@ -703,9 +703,9 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 32 -; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32 +; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP1]], i64 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 @@ -807,7 +807,7 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -921,7 +921,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6]] = add <16 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]] @@ -1042,7 +1042,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE2:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP6]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32> @@ -1156,9 +1156,9 @@ define i64 @sext_reduction_i32_to_i64(ptr %arr, i64 %n) #1 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE8:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 -; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 8 -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 12 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 4 +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 8 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 12 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll index 5355a9772ef10..73dbefeb10413 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll @@ -15,7 +15,7 @@ define void @cost_hoisted_vector_code(ptr %p, float %arg) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[P]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index 68cfc659e1e94..fceab6f823d5a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -229,9 +229,9 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2) ; CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8 ; CHECK-NEXT: store ptr null, ptr [[TMP17]], align 8 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr ptr, ptr [[DST2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[TMP12]], i32 2 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr ptr, ptr [[TMP12]], i32 4 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr ptr, ptr [[TMP12]], i32 6 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 2 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 6 ; CHECK-NEXT: store <2 x ptr> [[TMP0]], ptr [[TMP12]], align 8 ; CHECK-NEXT: store <2 x ptr> [[TMP1]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[TMP14]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index 1596b60f48567..bf4ab32fbf9e4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -142,7 +142,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16 +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 16 ; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] ; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1, !alias.scope [[META9]], !noalias [[META6]] ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll index 76a7536501bd6..389f91f878534 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll @@ -29,17 +29,17 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) { ; CHECK-CA510: [[VECTOR_BODY]]: ; CHECK-CA510-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-CA510-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP2]] -; CHECK-CA510-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 4 +; CHECK-CA510-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 4 ; CHECK-CA510-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-CA510-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-CA510-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP2]] -; CHECK-CA510-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 4 +; CHECK-CA510-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 4 ; CHECK-CA510-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP6]], align 4 ; CHECK-CA510-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; CHECK-CA510-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD]] ; CHECK-CA510-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD5]] ; CHECK-CA510-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[TMP2]] -; CHECK-CA510-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i32 4 +; CHECK-CA510-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 4 ; CHECK-CA510-NEXT: store <4 x float> [[TMP9]], ptr [[TMP11]], align 4 ; CHECK-CA510-NEXT: store <4 x float> [[TMP10]], ptr [[TMP13]], align 4 ; CHECK-CA510-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 8 @@ -93,17 +93,17 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) { ; CHECK-CA520: [[VECTOR_BODY]]: ; CHECK-CA520-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-CA520-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP2]] -; CHECK-CA520-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 4 +; CHECK-CA520-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 4 ; CHECK-CA520-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-CA520-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-CA520-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP2]] -; CHECK-CA520-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 4 +; CHECK-CA520-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 4 ; CHECK-CA520-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP6]], align 4 ; CHECK-CA520-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; CHECK-CA520-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD]] ; CHECK-CA520-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD5]] ; CHECK-CA520-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[TMP2]] -; CHECK-CA520-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i32 4 +; CHECK-CA520-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 4 ; CHECK-CA520-NEXT: store <4 x float> [[TMP9]], ptr [[TMP11]], align 4 ; CHECK-CA520-NEXT: store <4 x float> [[TMP10]], ptr [[TMP13]], align 4 ; CHECK-CA520-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 8 @@ -157,17 +157,17 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) { ; CHECK-CA320: [[VECTOR_BODY]]: ; CHECK-CA320-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-CA320-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]] -; CHECK-CA320-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-CA320-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-CA320-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-CA320-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-CA320-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]] -; CHECK-CA320-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i32 4 +; CHECK-CA320-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 4 ; CHECK-CA320-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-CA320-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-CA320-NEXT: [[TMP6:%.*]] = fadd fast <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD]] ; CHECK-CA320-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD5]] ; CHECK-CA320-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX]] -; CHECK-CA320-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i32 4 +; CHECK-CA320-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 4 ; CHECK-CA320-NEXT: store <4 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-CA320-NEXT: store <4 x float> [[TMP7]], ptr [[TMP9]], align 4 ; CHECK-CA320-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll index 2a19402347e40..6eb8242bf7975 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll @@ -178,9 +178,9 @@ define void @test_interleave_store_one_constant(ptr noalias %src, ptr noalias %d ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP13]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP13]], i32 4 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP13]], i32 6 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP13]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP13]], i64 6 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP13]], align 8 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP15]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP16]], align 8 @@ -323,9 +323,9 @@ define void @single_fmul_used_by_each_member(ptr noalias %A, ptr noalias %B, ptr ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 2 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 4 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 6 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i64 2 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i64 4 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i64 6 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP23]], align 8 ; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <2 x double>, ptr [[TMP25]], align 8 ; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <2 x double>, ptr [[TMP26]], align 8 @@ -456,8 +456,9 @@ define void @test_interleave_after_narrowing(i32 %n, ptr %x, ptr noalias %y) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll index 46b0ebdd2fa62..99c735f777b66 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll @@ -88,7 +88,7 @@ define void @load_store_interleave_group_block_invar_cond(ptr noalias %data, ptr ; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE11]] ; VF2IC2: [[PRED_STORE_CONTINUE11]]: ; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST_1]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 2 +; VF2IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 2 ; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP5]], align 1 ; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP6]], align 1 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -199,7 +199,7 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr % ; VF2IC2-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> ; VF2IC2-NEXT: store <4 x i64> [[INTERLEAVED_VEC5]], ptr [[TMP4]], align 8 ; VF2IC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 2 +; VF2IC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP7]], align 1 ; VF2IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP8]], align 1 ; VF2IC2-NEXT: [[TMP9:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll index d290f2d4f5bc3..b14b1783c97e3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll @@ -62,7 +62,7 @@ define void @test_2xi64_with_wide_load(ptr noalias %data, ptr noalias %factor) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = shl nsw i64 [[INDEX]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll index 44b4e5a8c2bc7..4ede21040f393 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll @@ -17,9 +17,9 @@ define void @test_remove_vector_loop_region_epilogue(ptr %dst, i1 %c) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i32 16 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i32 32 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i32 48 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 48 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[DST]], align 4 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP2]], align 4 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP3]], align 4 @@ -30,7 +30,7 @@ define void @test_remove_vector_loop_region_epilogue(ptr %dst, i1 %c) { ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF0:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] @@ -49,7 +49,7 @@ define void @test_remove_vector_loop_region_epilogue(ptr %dst, i1 %c) { ; CHECK-NEXT: store i8 0, ptr [[GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[TC]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -69,7 +69,8 @@ exit: ret void } ;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[PROF0]] = !{!"branch_weights", i32 8, i32 56} +; CHECK: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll index 2abc787061b53..ec874d0b48030 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll @@ -11,14 +11,14 @@ define void @vector_reverse_f64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-LABEL: vector_reverse_f64 ; CHECK-LABEL: vector.body -; CHECK: %[[GEP:.*]] = getelementptr inbounds double, ptr %{{.*}}, i32 0 -; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds double, ptr %[[GEP]], i32 -7 +; CHECK: %[[GEP:.*]] = getelementptr inbounds double, ptr %{{.*}}, i64 0 +; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds double, ptr %[[GEP]], i64 -7 ; CHECK-NEXT: %[[WIDE:.*]] = load <8 x double>, ptr %[[GEP1]], align 8 ; CHECK-NEXT: %[[REVERSE:.*]] = shufflevector <8 x double> %[[WIDE]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: %[[FADD:.*]] = fadd <8 x double> %[[REVERSE]] ; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds double, ptr {{.*}}, i64 {{.*}} -; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds double, ptr %[[GEP2]], i32 0 -; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds double, ptr %[[GEP3]], i32 -7 +; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds double, ptr %[[GEP2]], i64 0 +; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds double, ptr %[[GEP3]], i64 -7 ; CHECK-NEXT: %[[REVERSE6:.*]] = shufflevector <8 x double> %[[FADD]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: store <8 x double> %[[REVERSE6]], ptr %[[GEP4]], align 8 @@ -44,14 +44,14 @@ for.body: ; preds = %entry, %for.body define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-LABEL: vector_reverse_i64 ; CHECK-LABEL: vector.body -; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, ptr %{{.*}}, i32 0 -; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds i64, ptr %[[GEP]], i32 -7 +; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, ptr %{{.*}}, i64 0 +; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds i64, ptr %[[GEP]], i64 -7 ; CHECK-NEXT: %[[WIDE:.*]] = load <8 x i64>, ptr %[[GEP1]], align 8 ; CHECK-NEXT: %[[REVERSE:.*]] = shufflevector <8 x i64> %[[WIDE]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: %[[FADD:.*]] = add <8 x i64> %[[REVERSE]] ; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i64, ptr {{.*}}, i64 {{.*}} -; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds i64, ptr %[[GEP2]], i32 0 -; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds i64, ptr %[[GEP3]], i32 -7 +; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds i64, ptr %[[GEP2]], i64 0 +; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds i64, ptr %[[GEP3]], i64 -7 ; CHECK-NEXT: %[[REVERSE6:.*]] = shufflevector <8 x i64> %[[FADD]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: store <8 x i64> %[[REVERSE6]], ptr %[[GEP4]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll index 7afa8ce998121..e05332abcee61 100644 --- a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll @@ -22,7 +22,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index abbd176a1df6e..478c9c1141949 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -51,17 +51,17 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[STEP_ADD_10:%.*]] = add <2 x i64> [[STEP_ADD_9]], splat (i64 2) ; CHECK-NEXT: [[STEP_ADD_11:%.*]] = add <2 x i64> [[STEP_ADD_10]], splat (i64 2) ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 6 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 10 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 12 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 14 -; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 18 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 20 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 22 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 6 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 8 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 10 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 12 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 14 +; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 18 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 20 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 22 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <2 x i8>, ptr [[TMP12]], align 1 ; CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <2 x i8>, ptr [[TMP13]], align 1 @@ -193,6 +193,7 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[CMP_I166_I:%.*]] = icmp ult ptr [[PTR_IV]], [[END]] ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[IV]], [[N]] ; CHECK-NEXT: [[AND:%.*]] = select i1 [[CMP_I166_I]], i1 [[CMP2]], i1 false +; CHECK-NEXT: br i1 [[AND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP52]], %[[MIDDLE_BLOCK]] ], [ [[TMP55]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[RES:%.*]] = icmp eq i64 [[RED_NEXT_LCSSA]], 0 @@ -226,4 +227,6 @@ exit: ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[PROF3]] = !{!"branch_weights", i32 2, i32 22} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index 7677c9666455a..f1fbf1dd5d942 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -22,13 +22,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-TWO-CHECK: [[VECTOR_BODY]]: ; VF-TWO-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[INDEX]] -; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 4 -; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 8 -; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 12 -; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 16 -; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 20 -; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 24 -; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 4 +; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 8 +; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 12 +; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 16 +; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 20 +; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 24 +; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 28 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP17]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP18]], align 4 @@ -38,13 +38,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP22]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP23]], align 4 ; VF-TWO-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[INDEX]] -; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 4 -; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 8 -; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 12 -; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 16 -; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 20 -; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 24 -; VF-TWO-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 4 +; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 8 +; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 12 +; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 16 +; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 20 +; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 24 +; VF-TWO-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 28 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP24]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP33]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP34]], align 4 @@ -62,13 +62,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-TWO-CHECK-NEXT: [[TMP46:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD15]] ; VF-TWO-CHECK-NEXT: [[TMP47:%.*]] = fadd fast <4 x float> [[WIDE_LOAD8]], [[WIDE_LOAD16]] ; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[INDEX]] -; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 4 -; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 8 -; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 12 -; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 16 -; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 20 -; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 24 -; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 4 +; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 8 +; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 12 +; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 16 +; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 20 +; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 24 +; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 28 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP40]], ptr [[TMP48]], align 4 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP41]], ptr [[TMP57]], align 4 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP42]], ptr [[TMP58]], align 4 @@ -124,13 +124,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-FOUR-CHECK: [[VECTOR_BODY]]: ; VF-FOUR-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[INDEX]] -; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 4 -; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 8 -; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 12 -; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 16 -; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 20 -; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 24 -; VF-FOUR-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 4 +; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 8 +; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 12 +; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 16 +; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 20 +; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 24 +; VF-FOUR-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 28 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP17]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP18]], align 4 @@ -140,13 +140,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP22]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP23]], align 4 ; VF-FOUR-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[INDEX]] -; VF-FOUR-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 4 -; VF-FOUR-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 8 -; VF-FOUR-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 12 -; VF-FOUR-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 16 -; VF-FOUR-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 20 -; VF-FOUR-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 24 -; VF-FOUR-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 4 +; VF-FOUR-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 8 +; VF-FOUR-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 12 +; VF-FOUR-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 16 +; VF-FOUR-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 20 +; VF-FOUR-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 24 +; VF-FOUR-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 28 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP24]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP33]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP34]], align 4 @@ -164,13 +164,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-FOUR-CHECK-NEXT: [[TMP46:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD15]] ; VF-FOUR-CHECK-NEXT: [[TMP47:%.*]] = fadd fast <4 x float> [[WIDE_LOAD8]], [[WIDE_LOAD16]] ; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[INDEX]] -; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 4 -; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 8 -; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 12 -; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 16 -; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 20 -; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 24 -; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 4 +; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 8 +; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 12 +; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 16 +; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 20 +; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 24 +; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 28 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP40]], ptr [[TMP48]], align 4 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP41]], ptr [[TMP57]], align 4 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP42]], ptr [[TMP58]], align 4 @@ -261,22 +261,22 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP24]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = sext i32 [[TMP32]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP40]] -; VF-TWO-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 0 -; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP56]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -4 -; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP58]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -8 -; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP60]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -12 -; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP62]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -16 -; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP64]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -20 -; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP66]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -24 -; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP68]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -28 -; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP70]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 0 +; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -4 +; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -8 +; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -12 +; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -16 +; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -20 +; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -24 +; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -28 +; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4 @@ -302,13 +302,13 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00) ; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00) ; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4 -; VF-TWO-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 8 -; VF-TWO-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 12 -; VF-TWO-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 16 -; VF-TWO-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 20 -; VF-TWO-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 24 -; VF-TWO-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 4 +; VF-TWO-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 8 +; VF-TWO-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 12 +; VF-TWO-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 16 +; VF-TWO-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 20 +; VF-TWO-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 24 +; VF-TWO-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 28 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP72]], ptr [[TMP80]], align 4 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP73]], ptr [[TMP89]], align 4 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP74]], ptr [[TMP90]], align 4 @@ -340,8 +340,8 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP101:%.*]] = sext i32 [[TMP100]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP101]] -; VF-TWO-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i32 0 -; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -1 +; VF-TWO-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 0 +; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP50]], i64 -1 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <2 x float>, ptr [[TMP104]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <2 x float> [[WIDE_LOAD23]], <2 x float> poison, <2 x i32> ; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <2 x float> [[REVERSE24]], splat (float 1.000000e+00) @@ -384,22 +384,22 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP24]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP40:%.*]] = sext i32 [[TMP32]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP40]] -; VF-FOUR-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 0 -; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP56]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -4 -; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP58]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -8 -; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP60]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -12 -; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP62]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -16 -; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP64]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -20 -; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP66]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -24 -; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP68]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -28 -; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP70]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 0 +; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -4 +; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -8 +; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -12 +; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -16 +; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -20 +; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -24 +; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -28 +; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4 @@ -425,13 +425,13 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00) ; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00) ; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4 -; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 8 -; VF-FOUR-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 12 -; VF-FOUR-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 16 -; VF-FOUR-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 20 -; VF-FOUR-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 24 -; VF-FOUR-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 4 +; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 8 +; VF-FOUR-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 12 +; VF-FOUR-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 16 +; VF-FOUR-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 20 +; VF-FOUR-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 24 +; VF-FOUR-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 28 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP72]], ptr [[TMP80]], align 4 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP73]], ptr [[TMP89]], align 4 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP74]], ptr [[TMP90]], align 4 @@ -463,8 +463,8 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP101:%.*]] = sext i32 [[TMP100]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP101]] -; VF-FOUR-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i32 0 -; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 0 +; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP50]], i64 -3 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, ptr [[TMP104]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x float> [[WIDE_LOAD23]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <4 x float> [[REVERSE24]], splat (float 1.000000e+00) diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll index d82a3cde4639a..dc9c154b3fe05 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll @@ -34,13 +34,13 @@ define void @test(ptr %arr, i32 %len) { ; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[ARR]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 6 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 8 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 10 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 12 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 14 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 14 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x double>, ptr [[TMP6]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll index 8d3026e63748a..1ae1ba6795c01 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll @@ -87,13 +87,13 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-V-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[WIDE_LOAD]] to <8 x i32> ; FIXED-V-NEXT: [[TMP4:%.*]] = sext <8 x i8> [[WIDE_LOAD2]] to <8 x i32> ; FIXED-V-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-V-NEXT: [[TMP8:%.*]] = sext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -123,11 +123,11 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP8:%.*]] = sext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -280,13 +280,13 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-V-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i32> ; FIXED-V-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD2]] to <8 x i32> ; FIXED-V-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-V-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -316,11 +316,11 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -473,13 +473,13 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-V-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i32> ; FIXED-V-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD2]] to <8 x i32> ; FIXED-V-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-V-NEXT: [[TMP8:%.*]] = sext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -509,11 +509,11 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP9:%.*]] = sext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -665,13 +665,13 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-V-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[WIDE_LOAD]] to <8 x i32> ; FIXED-V-NEXT: [[TMP4:%.*]] = sext <8 x i8> [[WIDE_LOAD2]] to <8 x i32> ; FIXED-V-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-V-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -701,11 +701,11 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP9:%.*]] = zext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll index 735fb769de8b9..671a929e6fa35 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll @@ -69,7 +69,7 @@ define i32 @sub(ptr %a, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP4]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP3]]) ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -116,7 +116,7 @@ define i32 @addsub(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP6]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP5]]) ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -166,7 +166,7 @@ define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -212,7 +212,7 @@ define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32( [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -258,7 +258,7 @@ define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -305,7 +305,7 @@ define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -353,7 +353,7 @@ define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -400,7 +400,7 @@ define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -444,7 +444,7 @@ define half @fadd_fast_half_zvfh(ptr noalias nocapture readonly %a, i64 %n) "tar ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -483,14 +483,14 @@ define half @fadd_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) " ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x half>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x half>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2]] = fadd fast <16 x half> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP3]] = fadd fast <16 x half> [[WIDE_LOAD2]], [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x half> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> [[BIN_RDX]]) @@ -508,7 +508,7 @@ define half @fadd_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) " ; CHECK-NEXT: [[ADD]] = fadd fast half [[TMP6]], [[SUM_07]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi half [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret half [[ADD_LCSSA]] @@ -545,14 +545,14 @@ define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) "targ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x bfloat>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x bfloat>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2]] = fadd fast <16 x bfloat> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP3]] = fadd fast <16 x bfloat> [[WIDE_LOAD2]], [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x bfloat> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> [[BIN_RDX]]) @@ -570,7 +570,7 @@ define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) "targ ; CHECK-NEXT: [[ADD]] = fadd fast bfloat [[TMP6]], [[SUM_07]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi bfloat [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret bfloat [[ADD_LCSSA]] @@ -615,7 +615,7 @@ define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call float @llvm.vector.reduce.fmin.nxv4f32( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -661,7 +661,7 @@ define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) # ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call half @llvm.vector.reduce.fmin.nxv8f16( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -707,7 +707,7 @@ define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call bfloat @llvm.vector.reduce.fmin.nxv8bf16( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -755,7 +755,7 @@ define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -801,7 +801,7 @@ define half @fmax_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) # ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call fast half @llvm.vector.reduce.fmax.nxv8f16( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -847,7 +847,7 @@ define bfloat @fmax_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call fast bfloat @llvm.vector.reduce.fmax.nxv8bf16( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -891,14 +891,14 @@ define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2]] = mul <8 x i32> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP3]] = mul <8 x i32> [[WIDE_LOAD2]], [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <8 x i32> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[BIN_RDX]]) @@ -916,7 +916,7 @@ define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[MUL]] = mul nsw i32 [[TMP6]], [[SUM_07]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[MUL_LCSSA]] @@ -963,7 +963,7 @@ define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture re ; CHECK-NEXT: [[TMP5]] = mul <8 x i32> [[WIDE_LOAD1]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -986,7 +986,7 @@ define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture re ; CHECK-NEXT: [[MUL]] = mul nsw i32 [[TMP9]], [[SUM]] ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[MUL_LCSSA]] @@ -1036,7 +1036,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP16:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -1084,7 +1084,7 @@ define half @fmuladd_f16_zvfh(ptr %a, ptr %b, i64 %n) "target-features"="+zvfh" ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP16:%.*]] = call reassoc half @llvm.vector.reduce.fadd.nxv8f16(half 0xH8000, [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -1128,18 +1128,18 @@ define half @fmuladd_f16_zvfhmin(ptr %a, ptr %b, i64 %n) "target-features"="+zvf ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x half> [ , %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x half> [ splat (half 0xH8000), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x half>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x half>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds half, ptr [[TMP2]], i32 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds half, ptr [[TMP2]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x half>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x half>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD]], <16 x half> [[WIDE_LOAD3]], <16 x half> [[VEC_PHI]]) ; CHECK-NEXT: [[TMP5]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD2]], <16 x half> [[WIDE_LOAD4]], <16 x half> [[VEC_PHI1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc <16 x half> [[TMP5]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> [[BIN_RDX]]) @@ -1159,7 +1159,7 @@ define half @fmuladd_f16_zvfhmin(ptr %a, ptr %b, i64 %n) "target-features"="+zvf ; CHECK-NEXT: [[MULADD]] = tail call reassoc half @llvm.fmuladd.f16(half [[TMP8]], half [[TMP9]], half [[SUM_07]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi half [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret half [[MULADD_LCSSA]] @@ -1198,18 +1198,18 @@ define bfloat @fmuladd_bf16(ptr %a, ptr %b, i64 %n) "target-features"="+zvfbfmin ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x bfloat> [ , %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x bfloat> [ splat (bfloat 0xR8000), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x bfloat>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x bfloat>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds bfloat, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds bfloat, ptr [[TMP2]], i32 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds bfloat, ptr [[TMP2]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x bfloat>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x bfloat>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD]], <16 x bfloat> [[WIDE_LOAD3]], <16 x bfloat> [[VEC_PHI]]) ; CHECK-NEXT: [[TMP5]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD2]], <16 x bfloat> [[WIDE_LOAD4]], <16 x bfloat> [[VEC_PHI1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc <16 x bfloat> [[TMP5]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = call reassoc bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR8000, <16 x bfloat> [[BIN_RDX]]) @@ -1229,7 +1229,7 @@ define bfloat @fmuladd_bf16(ptr %a, ptr %b, i64 %n) "target-features"="+zvfbfmin ; CHECK-NEXT: [[MULADD]] = tail call reassoc bfloat @llvm.fmuladd.bf16(bfloat [[TMP8]], bfloat [[TMP9]], bfloat [[SUM_07]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi bfloat [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret bfloat [[MULADD_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll index b9a4e97cd9f24..cc1b2380bc532 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll @@ -108,7 +108,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD2]]) @@ -117,7 +117,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP5]] = mul i32 [[VEC_PHI1]], [[TMP4]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 8 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP5]], [[MUL]] ; IF-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_RND_UP]], [[N_VEC]] @@ -134,7 +134,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[MUL1]] = mul nsw i32 [[TMP0]], [[RDX1]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL1]], [[FOR_BODY1]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret i32 [[MUL_LCSSA]] @@ -152,7 +152,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 ; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -219,7 +219,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -303,7 +303,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -387,7 +387,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -471,7 +471,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -557,7 +557,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -643,7 +643,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -729,7 +729,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -815,7 +815,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -895,7 +895,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP8:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD2]]) @@ -904,7 +904,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP5]] = fmul reassoc float [[VEC_PHI1]], [[TMP4]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 8 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[BIN_RDX:%.*]] = fmul reassoc float [[TMP5]], [[MUL]] ; IF-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_RND_UP]], [[N_VEC]] @@ -921,7 +921,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MUL1]] = fmul reassoc float [[TMP0]], [[RDX1]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP15:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL1]], [[FOR_BODY1]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MUL_LCSSA]] @@ -939,7 +939,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; NO-VP-NEXT: [[TMP6:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]]) @@ -1007,7 +1007,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -1095,7 +1095,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -1179,14 +1179,14 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI2:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI2]], <8 x float> [[WIDE_LOAD3]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[TMP3]], <8 x float> [[TMP4]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP5]]) @@ -1204,7 +1204,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP19:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MIN_LCSSA]] @@ -1224,7 +1224,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP5]], align 4 ; NO-VP-NEXT: [[TMP6]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) @@ -1287,14 +1287,14 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI2:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI2]], <8 x float> [[WIDE_LOAD3]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[TMP3]], <8 x float> [[TMP4]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP5]]) @@ -1312,7 +1312,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP21:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MAX_LCSSA]] @@ -1332,7 +1332,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP5]], align 4 ; NO-VP-NEXT: [[TMP6]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) @@ -1401,7 +1401,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -1492,7 +1492,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP16]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] @@ -1584,7 +1584,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP16]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll index 7179e7dc48c8d..d1a2303e35e68 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll @@ -110,14 +110,14 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ [[TMP9]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 8 +; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 8 ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[TMP5]] = mul <8 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP4]] = mul <8 x i32> [[WIDE_LOAD2]], [[VEC_PHI1]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP6:%.*]] = mul <8 x i32> [[TMP4]], [[TMP5]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP6]]) @@ -135,7 +135,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[MUL]] = mul nsw i32 [[TMP0]], [[RDX]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], [[FOR_BODY1]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret i32 [[MUL_LCSSA]] @@ -154,7 +154,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 8 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 ; NO-VP-NEXT: [[TMP5]] = mul <8 x i32> [[WIDE_LOAD]], [[VEC_PHI]] @@ -221,7 +221,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -308,7 +308,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32( [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -395,7 +395,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -484,7 +484,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -577,7 +577,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -670,7 +670,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -763,7 +763,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -854,7 +854,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -937,14 +937,14 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[TMP9]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ splat (float 1.000000e+00), [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 8 +; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 8 ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[TMP5]] = fmul reassoc <8 x float> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP4]] = fmul reassoc <8 x float> [[WIDE_LOAD2]], [[VEC_PHI1]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP6:%.*]] = fmul reassoc <8 x float> [[TMP4]], [[TMP5]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> [[TMP6]]) @@ -962,7 +962,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MUL]] = fmul reassoc float [[TMP0]], [[RDX]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP15:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL]], [[FOR_BODY1]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MUL_LCSSA]] @@ -981,7 +981,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ splat (float 1.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP4]], align 4 ; NO-VP-NEXT: [[TMP5]] = fmul reassoc <8 x float> [[WIDE_LOAD]], [[VEC_PHI]] @@ -1050,7 +1050,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -1143,7 +1143,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -1230,14 +1230,14 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_MASKED_LOAD]]) ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[TMP4]], <8 x float> [[TMP3]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP5]]) @@ -1255,7 +1255,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP19:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MIN_LCSSA]] @@ -1275,7 +1275,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 +; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 ; NO-VP-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) @@ -1338,14 +1338,14 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_MASKED_LOAD]]) ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[TMP4]], <8 x float> [[TMP3]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP5]]) @@ -1363,7 +1363,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP21:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MAX_LCSSA]] @@ -1383,7 +1383,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 +; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 ; NO-VP-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) @@ -1452,7 +1452,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP20:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP17]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -1544,7 +1544,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] @@ -1636,7 +1636,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll index 7b0ac78fb365c..13990000585ea 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll @@ -331,20 +331,20 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] ; NO-VP-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1024, [[INDEX]] ; NO-VP-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0 -; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 -15 +; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 +; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -15 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; NO-VP-NEXT: [[REVERSE:%.*]] = shufflevector <16 x i8> [[WIDE_LOAD]], <16 x i8> poison, <16 x i32> ; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B:%.*]], <16 x i8> [[REVERSE]] ; NO-VP-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> align 1 [[TMP3]], <16 x i1> splat (i1 true), <16 x i8> poison) ; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0 -; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 -15 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 0 +; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i64 -15 ; NO-VP-NEXT: [[REVERSE1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_GATHER]], <16 x i8> poison, <16 x i32> ; NO-VP-NEXT: store <16 x i8> [[REVERSE1]], ptr [[TMP6]], align 1 ; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0 -; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 -15 +; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 0 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -15 ; NO-VP-NEXT: store <16 x i8> [[REVERSE1]], ptr [[TMP9]], align 1 ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 0375f0a8fd132..03377f10c2283 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -44,7 +44,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -160,7 +160,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -294,7 +294,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer ; FIXEDLEN-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER1]], <4 x i64> zeroinitializer ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 +; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -417,7 +417,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -517,7 +517,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 ; FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 +; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -630,7 +630,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; FIXEDLEN-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 7 ; FIXEDLEN-NEXT: store i64 [[TMP4]], ptr [[B]], align 8 ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 4 +; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -756,7 +756,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> align 8 [[BROADCAST_SPLAT2]], <4 x i1> [[TMP1]]) ; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> align 8 [[BROADCAST_SPLAT2]], <4 x i1> [[TMP2]]) ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 +; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -878,7 +878,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 ; FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 +; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 0287645d9d7f9..94ebf01509ec2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -126,9 +126,9 @@ define void @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwi ; CHECK-NEXT: [[TMP19:%.*]] = sitofp <2 x i64> [[TMP10]] to <2 x float> ; CHECK-NEXT: [[TMP20:%.*]] = sitofp <2 x i64> [[TMP11]] to <2 x float> ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 4 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 6 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 6 ; CHECK-NEXT: store <2 x float> [[TMP12]], ptr [[TMP13]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP18]], ptr [[TMP15]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP19]], ptr [[TMP16]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index b3c45a565a8fe..c70a3aa249919 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -385,7 +385,7 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[SRC_3]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i32 2 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP25]], align 8, !alias.scope [[META9:![0-9]+]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq <2 x i64> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP27:%.*]] = and <2 x i1> [[TMP23]], [[TMP26]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll index 3165422dcc539..d19ae728cc913 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll @@ -23,8 +23,8 @@ define i1 @fn(ptr %nno) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i32, ptr [[NNO]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP23]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 -3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP23]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP6]], <4 x i1> [[REVERSE]], <4 x i32> poison) ; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index 6e940ee58fabe..a1b92e0658bd3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -161,9 +161,9 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[STEP_ADD_2]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = sub <16 x i16> [[STEP_ADD_3]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[K:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 16 -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 32 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 48 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i64 16 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i64 32 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i64 48 ; CHECK-NEXT: store <16 x i16> [[TMP4]], ptr [[TMP8]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP10]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP21]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 12b8d1e15b523..84579d97b38e2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -26,7 +26,7 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i8> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD1]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> @@ -34,7 +34,7 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP9:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = add <16 x i8> [[WIDE_LOAD1]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 16 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -119,7 +119,7 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[VECTOR_RECUR4:%.*]] = phi <16 x i8> [ [[VECTOR_RECUR_INIT3]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD5]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> @@ -135,7 +135,7 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP17:%.*]] = add <16 x i8> [[TMP15]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18:%.*]] = add <16 x i8> [[TMP16]], [[WIDE_LOAD5]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i32 16 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP19]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP18]], ptr [[TMP22]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index 39217e51ab117..41249c595f9eb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -41,9 +41,9 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[STEP_ADD]], splat (float 4.000000e+00) ; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[STEP_ADD2]], splat (float 4.000000e+00) ; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 16 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 24 +; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 16 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 24 ; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND]], ptr [[TMP1]], align 4 ; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD]], ptr [[TMP2]], align 4 ; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD2]], ptr [[TMP3]], align 4 @@ -208,9 +208,9 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[STEP_ADD]], splat (double 1.200000e+01) ; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[STEP_ADD_2]], splat (double 1.200000e+01) ; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP1]], i32 4 -; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[TMP1]], i32 8 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP1]], i32 12 +; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP1]], i64 4 +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 8 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP1]], i64 12 ; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], ptr [[TMP1]], align 8 ; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], ptr [[TMP2]], align 8 ; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], ptr [[TMP3]], align 8 @@ -326,9 +326,9 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], splat (float 3.360000e+02) ; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], splat (float 3.360000e+02) ; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDEX]] -; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 16 -; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 24 +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 16 +; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 24 ; AUTO_VEC-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; AUTO_VEC-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 ; AUTO_VEC-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP4]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll index a0637ceb53cf2..137c09b653f2c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll @@ -22,17 +22,17 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD5]], <4 x float> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -99,17 +99,17 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD5]], <4 x float> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -176,17 +176,17 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD5]], <2 x double> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -253,17 +253,17 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD5]], <2 x double> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll index 877fcd4d638eb..34a99b07ee93e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll @@ -75,7 +75,7 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; SSE-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] ; SSE-NEXT: [[VEC_PHI1:%.*]] = phi <2 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ] ; SSE-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[ARR:%.*]], i32 [[INDEX]] -; SSE-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i32 2 +; SSE-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i64 2 ; SSE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; SSE-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; SSE-NEXT: [[TMP6:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD]], splat (double 4.200000e+01) @@ -106,9 +106,9 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; AVX-NEXT: [[VEC_PHI2:%.*]] = phi <4 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI8:%.*]], [[VECTOR_BODY]] ] ; AVX-NEXT: [[VEC_PHI3:%.*]] = phi <4 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI9:%.*]], [[VECTOR_BODY]] ] ; AVX-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[ARR:%.*]], i32 [[INDEX]] -; AVX-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[TMP4]], i32 4 -; AVX-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[TMP4]], i32 8 -; AVX-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP4]], i32 12 +; AVX-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[TMP4]], i64 4 +; AVX-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[TMP4]], i64 8 +; AVX-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP4]], i64 12 ; AVX-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP4]], align 8 ; AVX-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x double>, ptr [[TMP9]], align 8 ; AVX-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x double>, ptr [[TMP10]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 4028dd87e34b3..04bff3c393f62 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -125,13 +125,13 @@ define void @multiple_truncated_ivs_with_wide_uses(i1 %c, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C]], <4 x i16> [[VEC_IND]], <4 x i16> splat (i16 10) ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C]], <4 x i16> [[STEP_ADD]], <4 x i16> splat (i16 10) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP4]], align 2, !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]] -; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[TMP7]], align 2, !alias.scope [[META6]], !noalias [[META9]] +; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[TMP3]], align 2, !alias.scope [[META6]], !noalias [[META9]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP8]], i64 4 ; CHECK-NEXT: store <4 x i32> [[VEC_IND3]], ptr [[TMP8]], align 4, !alias.scope [[META9]] -; CHECK-NEXT: store <4 x i32> [[STEP_ADD4]], ptr [[TMP11]], align 4, !alias.scope [[META9]] +; CHECK-NEXT: store <4 x i32> [[STEP_ADD4]], ptr [[TMP5]], align 4, !alias.scope [[META9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], splat (i32 4) @@ -192,7 +192,7 @@ define void @truncated_ivs_with_wide_and_scalar_uses(i1 %c, ptr %dst) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <8 x i16> [[VEC_IND]], <8 x i16> splat (i16 10) ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[STEP_ADD]], <8 x i16> splat (i16 10) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[TMP3]], i32 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[TMP3]], i64 8 ; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr [[TMP3]], align 2 ; CHECK-NEXT: store <8 x i16> [[TMP6]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -477,9 +477,9 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i64> [[TMP13]] to <8 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = trunc <8 x i64> [[TMP14]] to <8 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = trunc <8 x i64> [[TMP15]] to <8 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i32 8 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i32 16 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i32 24 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i64 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i64 16 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i64 24 ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP20]], ptr align 4 [[TMP16]], <8 x i1> [[TMP8]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP21]], ptr align 4 [[TMP25]], <8 x i1> [[TMP9]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr align 4 [[TMP26]], <8 x i1> [[TMP10]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll index 61f07eff768c1..d25d9f81de985 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll @@ -27,7 +27,7 @@ define i16 @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr [[TMP5]], align 2 ; CHECK-NEXT: store <4 x i16> [[TMP9]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -99,7 +99,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr [[TMP6]], align 2 ; CHECK-NEXT: store <4 x i16> [[TMP10]], ptr [[TMP9]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll index d75fd0e0023f7..ad6dfb054b726 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll @@ -32,9 +32,9 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[OFFSET_IDX2]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 16 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 32 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 48 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 32 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i16>, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i16>, ptr [[TMP2]], align 2 @@ -43,9 +43,9 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD4]], <16 x i16> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD5]], <16 x i16> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD6]], <16 x i16> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 16 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 32 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 48 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 16 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 32 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 48 ; CHECK-NEXT: store <16 x i16> [[TMP4]], ptr [[NEXT_GEP3]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP8]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP9]], align 2 @@ -160,9 +160,9 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 32 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 64 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 96 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 96 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <32 x i8>, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <32 x i8>, ptr [[TMP2]], align 2 @@ -171,9 +171,9 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD3]], <32 x i8> [[WIDE_LOAD3]], <32 x i8> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD4]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD5]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 32 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 64 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 96 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 32 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 96 ; CHECK-NEXT: store <32 x i8> [[TMP4]], ptr [[NEXT_GEP2]], align 2 ; CHECK-NEXT: store <32 x i8> [[TMP5]], ptr [[TMP8]], align 2 ; CHECK-NEXT: store <32 x i8> [[TMP6]], ptr [[TMP9]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll index b710236c026d2..751e885733f17 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -38,9 +38,9 @@ define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b) ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 16 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 32 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 48 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP2]], align 8, !alias.scope [[META0]] ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i32>, ptr [[TMP3]], align 8, !alias.scope [[META0]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll b/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll index bcb6b5c422343..a247285317a1e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll @@ -18,9 +18,9 @@ define i64 @test_pr98660(ptr %dst, i64 %N) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i32 8 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP9]], i32 16 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 24 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP9]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP14]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP15]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll index 6e3b2a5390948..ea3ec99cf46e1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll @@ -193,17 +193,17 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 64 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 64 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 64 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 12 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 12 ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], ptr [[TMP6]], align 64 ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD1]], ptr [[TMP8]], align 64 ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD2]], ptr [[TMP9]], align 64 diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 6605338771c47..78363e13595cb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -39,9 +39,9 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP8]], i32 8 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP8]], i32 12 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP8]], i64 8 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP8]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4 @@ -171,9 +171,9 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP64]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP69]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP70]], align 4 @@ -718,9 +718,9 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) { ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2 ; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3 ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i32 4 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i32 8 -; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i32 12 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8 +; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP65]], <4 x i1> [[TMP40]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP48]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP71]], <4 x i1> [[TMP56]], <4 x i32> poison) @@ -877,9 +877,9 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP64]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP69]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP70]], align 4 @@ -1231,9 +1231,9 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -1362,9 +1362,9 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -1493,9 +1493,9 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -1633,9 +1633,9 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2 ; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3 ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i32 4 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i32 8 -; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i32 12 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8 +; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP65]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP70]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP71]], align 4 @@ -1793,9 +1793,9 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -1925,9 +1925,9 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -2067,9 +2067,9 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll index 2c172b2aecd16..1d0906902ad62 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll @@ -35,9 +35,9 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i64> [[TMP13]] to <8 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = trunc <8 x i64> [[TMP14]] to <8 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = trunc <8 x i64> [[TMP15]] to <8 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i32 8 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i32 16 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i32 24 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i64 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i64 16 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i64 24 ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP20]], ptr align 4 [[TMP16]], <8 x i1> [[TMP8]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP21]], ptr align 4 [[TMP25]], <8 x i1> [[TMP9]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr align 4 [[TMP26]], <8 x i1> [[TMP10]]) @@ -199,7 +199,7 @@ define void @test_scalar_cost_single_store_loop_varying_cond(ptr %dst, ptr noali ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i32> [[WIDE_VEC4]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC]], splat (i32 123) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC5]], splat (i32 123) -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 4 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[NEXT_GEP]], <4 x i1> [[TMP8]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[TMP11]], <4 x i1> [[TMP9]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index 932153a23bdbd..e4977ee642b09 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -72,9 +72,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 8 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 24 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 8 +; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 16 +; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 24 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 @@ -84,9 +84,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 8 -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i32 16 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i32 24 +; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i64 8 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i64 16 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i64 24 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr align 4 [[TMP12]], <8 x i1> [[TMP8]], <8 x i32> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr align 4 [[TMP14]], <8 x i1> [[TMP9]], <8 x i32> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr align 4 [[TMP15]], <8 x i1> [[TMP10]], <8 x i32> poison) @@ -96,9 +96,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP19:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] ; AVX2-NEXT: [[TMP20:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] ; AVX2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i32 8 -; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i32 16 -; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i32 24 +; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i64 8 +; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 16 +; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i64 24 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP17]], ptr align 4 [[TMP21]], <8 x i1> [[TMP8]]) ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP18]], ptr align 4 [[TMP23]], <8 x i1> [[TMP9]]) ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP19]], ptr align 4 [[TMP24]], <8 x i1> [[TMP10]]) @@ -151,9 +151,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 32 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 48 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 16 +; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 32 +; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 48 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 @@ -163,9 +163,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 16 -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i32 32 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i32 48 +; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i64 16 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i64 32 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i64 48 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr align 4 [[TMP12]], <16 x i1> [[TMP8]], <16 x i32> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr align 4 [[TMP14]], <16 x i1> [[TMP9]], <16 x i32> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr align 4 [[TMP15]], <16 x i1> [[TMP10]], <16 x i32> poison) @@ -175,9 +175,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP19:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] ; AVX512-NEXT: [[TMP20:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] ; AVX512-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i32 16 -; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i32 32 -; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i32 48 +; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i64 16 +; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 32 +; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i64 48 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP17]], ptr align 4 [[TMP21]], <16 x i1> [[TMP8]]) ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP18]], ptr align 4 [[TMP23]], <16 x i1> [[TMP9]]) ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP19]], ptr align 4 [[TMP24]], <16 x i1> [[TMP10]]) @@ -293,9 +293,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 8 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 16 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 24 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 8 +; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 16 +; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 24 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP3]], align 4 ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP6]], align 4 @@ -305,9 +305,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 8 -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 16 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 24 +; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 8 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 16 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 24 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 4 [[TMP12]], <8 x i1> [[TMP8]], <8 x i32> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 4 [[TMP14]], <8 x i1> [[TMP9]], <8 x i32> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 4 [[TMP15]], <8 x i1> [[TMP10]], <8 x i32> poison) @@ -317,9 +317,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX2-NEXT: [[TMP19:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] ; AVX2-NEXT: [[TMP20:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] ; AVX2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr addrspace(1) [[A]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 8 -; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 16 -; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 24 +; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 8 +; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 16 +; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 24 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP17]], ptr addrspace(1) align 4 [[TMP21]], <8 x i1> [[TMP8]]) ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP18]], ptr addrspace(1) align 4 [[TMP23]], <8 x i1> [[TMP9]]) ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP19]], ptr addrspace(1) align 4 [[TMP24]], <8 x i1> [[TMP10]]) @@ -372,9 +372,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 16 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 32 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 48 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 16 +; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 32 +; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 48 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP3]], align 4 ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP6]], align 4 @@ -384,9 +384,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 16 -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 32 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 48 +; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 16 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 32 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 48 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) align 4 [[TMP12]], <16 x i1> [[TMP8]], <16 x i32> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) align 4 [[TMP14]], <16 x i1> [[TMP9]], <16 x i32> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) align 4 [[TMP15]], <16 x i1> [[TMP10]], <16 x i32> poison) @@ -396,9 +396,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512-NEXT: [[TMP19:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] ; AVX512-NEXT: [[TMP20:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] ; AVX512-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr addrspace(1) [[A]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 16 -; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 32 -; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 48 +; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 16 +; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 32 +; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 48 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP17]], ptr addrspace(1) align 4 [[TMP21]], <16 x i1> [[TMP8]]) ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP18]], ptr addrspace(1) align 4 [[TMP23]], <16 x i1> [[TMP9]]) ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP19]], ptr addrspace(1) align 4 [[TMP24]], <16 x i1> [[TMP10]]) @@ -524,9 +524,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 8 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 24 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 8 +; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 16 +; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 24 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 @@ -536,9 +536,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 8 -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i32 16 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i32 24 +; AVX2-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i64 8 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i64 16 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i64 24 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr align 4 [[TMP12]], <8 x i1> [[TMP8]], <8 x float> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr align 4 [[TMP14]], <8 x i1> [[TMP9]], <8 x float> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr align 4 [[TMP15]], <8 x i1> [[TMP10]], <8 x float> poison) @@ -552,9 +552,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP23:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD9]], [[TMP19]] ; AVX2-NEXT: [[TMP24:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD10]], [[TMP20]] ; AVX2-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i32 8 -; AVX2-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i32 16 -; AVX2-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i32 24 +; AVX2-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i64 8 +; AVX2-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i64 16 +; AVX2-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i64 24 ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP21]], ptr align 4 [[TMP25]], <8 x i1> [[TMP8]]) ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP22]], ptr align 4 [[TMP27]], <8 x i1> [[TMP9]]) ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP23]], ptr align 4 [[TMP28]], <8 x i1> [[TMP10]]) @@ -608,9 +608,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 32 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 48 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 16 +; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 32 +; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 48 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 @@ -620,9 +620,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 16 -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i32 32 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i32 48 +; AVX512-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i64 16 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i64 32 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i64 48 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr align 4 [[TMP12]], <16 x i1> [[TMP8]], <16 x float> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr align 4 [[TMP14]], <16 x i1> [[TMP9]], <16 x float> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr align 4 [[TMP15]], <16 x i1> [[TMP10]], <16 x float> poison) @@ -636,9 +636,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP23:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD9]], [[TMP19]] ; AVX512-NEXT: [[TMP24:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD10]], [[TMP20]] ; AVX512-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i32 16 -; AVX512-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i32 32 -; AVX512-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i32 48 +; AVX512-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i64 16 +; AVX512-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i64 32 +; AVX512-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i64 48 ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP21]], ptr align 4 [[TMP25]], <16 x i1> [[TMP8]]) ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP22]], ptr align 4 [[TMP27]], <16 x i1> [[TMP9]]) ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP23]], ptr align 4 [[TMP28]], <16 x i1> [[TMP10]]) @@ -732,25 +732,25 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1: [[VECTOR_BODY]]: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; AVX1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META8:![0-9]+]] -; AVX1-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META8]] -; AVX1-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META8]] -; AVX1-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META8]] +; AVX1-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META8]] +; AVX1-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META8]] +; AVX1-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META8]] ; AVX1-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX1-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX1-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX1-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], splat (i32 100) ; AVX1-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 4 -; AVX1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 -; AVX1-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP10]], i32 12 +; AVX1-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i64 4 +; AVX1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 8 +; AVX1-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i64 12 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP10]], <4 x i1> [[TMP6]], <4 x double> poison), !alias.scope [[META11:![0-9]+]] ; AVX1-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP12]], <4 x i1> [[TMP7]], <4 x double> poison), !alias.scope [[META11]] ; AVX1-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP13]], <4 x i1> [[TMP8]], <4 x double> poison), !alias.scope [[META11]] -; AVX1-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP14]], <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META11]] +; AVX1-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP11]], <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META11]] ; AVX1-NEXT: [[TMP15:%.*]] = sitofp <4 x i32> [[WIDE_LOAD]] to <4 x double> ; AVX1-NEXT: [[TMP16:%.*]] = sitofp <4 x i32> [[WIDE_LOAD6]] to <4 x double> ; AVX1-NEXT: [[TMP17:%.*]] = sitofp <4 x i32> [[WIDE_LOAD7]] to <4 x double> @@ -760,13 +760,13 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1-NEXT: [[TMP21:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD10]], [[TMP17]] ; AVX1-NEXT: [[TMP22:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD11]], [[TMP18]] ; AVX1-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 4 -; AVX1-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 8 -; AVX1-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 12 +; AVX1-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP23]], i64 4 +; AVX1-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i64 8 +; AVX1-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i64 12 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP19]], ptr align 8 [[TMP23]], <4 x i1> [[TMP6]]), !alias.scope [[META13:![0-9]+]], !noalias [[META15:![0-9]+]] -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr align 8 [[TMP25]], <4 x i1> [[TMP7]]), !alias.scope [[META13]], !noalias [[META15]] -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr align 8 [[TMP26]], <4 x i1> [[TMP8]]), !alias.scope [[META13]], !noalias [[META15]] -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr align 8 [[TMP27]], <4 x i1> [[TMP9]]), !alias.scope [[META13]], !noalias [[META15]] +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr align 8 [[TMP24]], <4 x i1> [[TMP7]]), !alias.scope [[META13]], !noalias [[META15]] +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr align 8 [[TMP25]], <4 x i1> [[TMP8]]), !alias.scope [[META13]], !noalias [[META15]] +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr align 8 [[TMP26]], <4 x i1> [[TMP9]]), !alias.scope [[META13]], !noalias [[META15]] ; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; AVX1-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -795,25 +795,25 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META12:![0-9]+]] -; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12]] -; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META12]] -; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META12]] +; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META12]] +; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META12]] +; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12]] ; AVX2-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX2-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], splat (i32 100) ; AVX2-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 4 -; AVX2-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP10]], i32 12 +; AVX2-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i64 4 +; AVX2-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 8 +; AVX2-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i64 12 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP10]], <4 x i1> [[TMP6]], <4 x double> poison), !alias.scope [[META15:![0-9]+]] ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP12]], <4 x i1> [[TMP7]], <4 x double> poison), !alias.scope [[META15]] ; AVX2-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP13]], <4 x i1> [[TMP8]], <4 x double> poison), !alias.scope [[META15]] -; AVX2-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP14]], <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META15]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP11]], <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META15]] ; AVX2-NEXT: [[TMP15:%.*]] = sitofp <4 x i32> [[WIDE_LOAD]] to <4 x double> ; AVX2-NEXT: [[TMP16:%.*]] = sitofp <4 x i32> [[WIDE_LOAD6]] to <4 x double> ; AVX2-NEXT: [[TMP17:%.*]] = sitofp <4 x i32> [[WIDE_LOAD7]] to <4 x double> @@ -823,13 +823,13 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP21:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD10]], [[TMP17]] ; AVX2-NEXT: [[TMP22:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD11]], [[TMP18]] ; AVX2-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 4 -; AVX2-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 8 -; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 12 +; AVX2-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP23]], i64 4 +; AVX2-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i64 8 +; AVX2-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i64 12 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP19]], ptr align 8 [[TMP23]], <4 x i1> [[TMP6]]), !alias.scope [[META17:![0-9]+]], !noalias [[META19:![0-9]+]] -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr align 8 [[TMP25]], <4 x i1> [[TMP7]]), !alias.scope [[META17]], !noalias [[META19]] -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr align 8 [[TMP26]], <4 x i1> [[TMP8]]), !alias.scope [[META17]], !noalias [[META19]] -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr align 8 [[TMP27]], <4 x i1> [[TMP9]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr align 8 [[TMP24]], <4 x i1> [[TMP7]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr align 8 [[TMP25]], <4 x i1> [[TMP8]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr align 8 [[TMP26]], <4 x i1> [[TMP9]]), !alias.scope [[META17]], !noalias [[META19]] ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; AVX2-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -860,25 +860,25 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 16 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 24 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 16 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 24 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META12:![0-9]+]] -; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12]] -; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META12]] -; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META12]] +; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META12]] +; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META12]] +; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12]] ; AVX512-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX512-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD8]], splat (i32 100) ; AVX512-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 -; AVX512-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i32 16 -; AVX512-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP10]], i32 24 +; AVX512-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i64 8 +; AVX512-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 16 +; AVX512-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i64 24 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP10]], <8 x i1> [[TMP6]], <8 x double> poison), !alias.scope [[META15:![0-9]+]] ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP12]], <8 x i1> [[TMP7]], <8 x double> poison), !alias.scope [[META15]] ; AVX512-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP13]], <8 x i1> [[TMP8]], <8 x double> poison), !alias.scope [[META15]] -; AVX512-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP14]], <8 x i1> [[TMP9]], <8 x double> poison), !alias.scope [[META15]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP11]], <8 x i1> [[TMP9]], <8 x double> poison), !alias.scope [[META15]] ; AVX512-NEXT: [[TMP15:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x double> ; AVX512-NEXT: [[TMP16:%.*]] = sitofp <8 x i32> [[WIDE_LOAD6]] to <8 x double> ; AVX512-NEXT: [[TMP17:%.*]] = sitofp <8 x i32> [[WIDE_LOAD7]] to <8 x double> @@ -888,13 +888,13 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP21:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD10]], [[TMP17]] ; AVX512-NEXT: [[TMP22:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD11]], [[TMP18]] ; AVX512-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 8 -; AVX512-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 16 -; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 24 +; AVX512-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP23]], i64 8 +; AVX512-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i64 16 +; AVX512-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i64 24 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP19]], ptr align 8 [[TMP23]], <8 x i1> [[TMP6]]), !alias.scope [[META17:![0-9]+]], !noalias [[META19:![0-9]+]] -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP20]], ptr align 8 [[TMP25]], <8 x i1> [[TMP7]]), !alias.scope [[META17]], !noalias [[META19]] -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP21]], ptr align 8 [[TMP26]], <8 x i1> [[TMP8]]), !alias.scope [[META17]], !noalias [[META19]] -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP22]], ptr align 8 [[TMP27]], <8 x i1> [[TMP9]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP20]], ptr align 8 [[TMP24]], <8 x i1> [[TMP7]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP21]], ptr align 8 [[TMP25]], <8 x i1> [[TMP8]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP22]], ptr align 8 [[TMP26]], <8 x i1> [[TMP9]]), !alias.scope [[META17]], !noalias [[META19]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 ; AVX512-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -1117,68 +1117,68 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -3 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -4 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -3 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -8 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 -; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -12 -; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 -; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META22:![0-9]+]] +; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -3 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -4 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -3 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -8 +; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 +; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -12 +; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 +; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META22:![0-9]+]] ; AVX2-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META22]] ; AVX2-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD6]], <4 x i32> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22]] ; AVX2-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD8]], <4 x i32> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META22]] ; AVX2-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD10]], <4 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer ; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[REVERSE7]], zeroinitializer ; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[REVERSE9]], zeroinitializer ; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt <4 x i32> [[REVERSE11]], zeroinitializer ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[IN]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP15]], i32 -3 -; AVX2-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP14]], i32 -4 -; AVX2-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP17]], i32 -3 -; AVX2-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP14]], i32 -8 -; AVX2-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP19]], i32 -3 -; AVX2-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP14]], i32 -12 -; AVX2-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP21]], i32 -3 +; AVX2-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP14]], i64 0 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP22]], i64 -3 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -4 +; AVX2-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i64 -3 +; AVX2-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -8 +; AVX2-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i64 -3 +; AVX2-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP14]], i64 -12 +; AVX2-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP20]], i64 -3 ; AVX2-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i1> [[TMP10]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP16]], <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope [[META25:![0-9]+]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP15]], <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope [[META25:![0-9]+]] ; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP11]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP18]], <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP17]], <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD15]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP12]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP20]], <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP19]], <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD18]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP22]], <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP21]], <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[TMP23:%.*]] = fadd <4 x double> [[REVERSE13]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[REVERSE16]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[REVERSE19]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[REVERSE22]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0 -; AVX2-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -3 -; AVX2-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP27]], i32 -4 -; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -3 -; AVX2-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP27]], i32 -8 -; AVX2-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP32]], i32 -3 -; AVX2-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP27]], i32 -12 -; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP34]], i32 -3 +; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP27]], i64 0 +; AVX2-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP35]], i64 -3 +; AVX2-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP27]], i64 -4 +; AVX2-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP29]], i64 -3 +; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP27]], i64 -8 +; AVX2-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP31]], i64 -3 +; AVX2-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP27]], i64 -12 +; AVX2-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP33]], i64 -3 ; AVX2-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE24]], ptr align 8 [[TMP29]], <4 x i1> [[REVERSE12]]), !alias.scope [[META27:![0-9]+]], !noalias [[META29:![0-9]+]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE24]], ptr align 8 [[TMP28]], <4 x i1> [[REVERSE12]]), !alias.scope [[META27:![0-9]+]], !noalias [[META29:![0-9]+]] ; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x double> [[TMP24]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE26]], ptr align 8 [[TMP31]], <4 x i1> [[REVERSE14]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE26]], ptr align 8 [[TMP30]], <4 x i1> [[REVERSE14]]), !alias.scope [[META27]], !noalias [[META29]] ; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x double> [[TMP25]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE28]], ptr align 8 [[TMP33]], <4 x i1> [[REVERSE17]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE28]], ptr align 8 [[TMP32]], <4 x i1> [[REVERSE17]]), !alias.scope [[META27]], !noalias [[META29]] ; AVX2-NEXT: [[REVERSE30:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE30]], ptr align 8 [[TMP35]], <4 x i1> [[REVERSE20]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE30]], ptr align 8 [[TMP34]], <4 x i1> [[REVERSE20]]), !alias.scope [[META27]], !noalias [[META29]] ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; AVX2-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -1208,68 +1208,68 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -7 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -8 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -7 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -16 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -7 -; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -24 -; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -7 -; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META34:![0-9]+]] +; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -7 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -8 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -7 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -16 +; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -7 +; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -24 +; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -7 +; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META34:![0-9]+]] ; AVX512-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE7:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD6]], <8 x i32> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE9:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD8]], <8 x i32> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[TMP9]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE11:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD10]], <8 x i32> poison, <8 x i32> ; AVX512-NEXT: [[TMP10:%.*]] = icmp sgt <8 x i32> [[REVERSE]], zeroinitializer ; AVX512-NEXT: [[TMP11:%.*]] = icmp sgt <8 x i32> [[REVERSE7]], zeroinitializer ; AVX512-NEXT: [[TMP12:%.*]] = icmp sgt <8 x i32> [[REVERSE9]], zeroinitializer ; AVX512-NEXT: [[TMP13:%.*]] = icmp sgt <8 x i32> [[REVERSE11]], zeroinitializer ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[IN]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP15]], i32 -7 -; AVX512-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP14]], i32 -8 -; AVX512-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP17]], i32 -7 -; AVX512-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP14]], i32 -16 -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP19]], i32 -7 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP14]], i32 -24 -; AVX512-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP21]], i32 -7 +; AVX512-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP14]], i64 0 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP22]], i64 -7 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -8 +; AVX512-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i64 -7 +; AVX512-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -16 +; AVX512-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i64 -7 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP14]], i64 -24 +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP20]], i64 -7 ; AVX512-NEXT: [[REVERSE12:%.*]] = shufflevector <8 x i1> [[TMP10]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP16]], <8 x i1> [[REVERSE12]], <8 x double> poison), !alias.scope [[META37:![0-9]+]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP15]], <8 x i1> [[REVERSE12]], <8 x double> poison), !alias.scope [[META37:![0-9]+]] ; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE14:%.*]] = shufflevector <8 x i1> [[TMP11]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP18]], <8 x i1> [[REVERSE14]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP17]], <8 x i1> [[REVERSE14]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE16:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD15]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP12]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP20]], <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP19]], <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD18]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP13]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP22]], <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP21]], <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[TMP23:%.*]] = fadd <8 x double> [[REVERSE13]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP24:%.*]] = fadd <8 x double> [[REVERSE16]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP25:%.*]] = fadd <8 x double> [[REVERSE19]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP26:%.*]] = fadd <8 x double> [[REVERSE22]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0 -; AVX512-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -7 -; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP27]], i32 -8 -; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -7 -; AVX512-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP27]], i32 -16 -; AVX512-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP32]], i32 -7 -; AVX512-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP27]], i32 -24 -; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP34]], i32 -7 +; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP27]], i64 0 +; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP35]], i64 -7 +; AVX512-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP27]], i64 -8 +; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP29]], i64 -7 +; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP27]], i64 -16 +; AVX512-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP31]], i64 -7 +; AVX512-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP27]], i64 -24 +; AVX512-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP33]], i64 -7 ; AVX512-NEXT: [[REVERSE24:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE24]], ptr align 8 [[TMP29]], <8 x i1> [[REVERSE12]]), !alias.scope [[META39:![0-9]+]], !noalias [[META41:![0-9]+]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE24]], ptr align 8 [[TMP28]], <8 x i1> [[REVERSE12]]), !alias.scope [[META39:![0-9]+]], !noalias [[META41:![0-9]+]] ; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE26]], ptr align 8 [[TMP31]], <8 x i1> [[REVERSE14]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE26]], ptr align 8 [[TMP30]], <8 x i1> [[REVERSE14]]), !alias.scope [[META39]], !noalias [[META41]] ; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x double> [[TMP25]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE28]], ptr align 8 [[TMP33]], <8 x i1> [[REVERSE17]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE28]], ptr align 8 [[TMP32]], <8 x i1> [[REVERSE17]]), !alias.scope [[META39]], !noalias [[META41]] ; AVX512-NEXT: [[REVERSE30:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE30]], ptr align 8 [[TMP35]], <8 x i1> [[REVERSE20]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE30]], ptr align 8 [[TMP34]], <8 x i1> [[REVERSE20]]), !alias.scope [[META39]], !noalias [[META41]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; AVX512-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] @@ -1332,9 +1332,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1: [[VECTOR_BODY]]: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -1348,9 +1348,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[TMP8]], zeroinitializer ; AVX1-NEXT: [[TMP17:%.*]] = icmp ne <4 x i8> [[TMP9]], zeroinitializer ; AVX1-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 4 -; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 8 -; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 12 +; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 4 +; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 8 +; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 12 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP13]], <4 x i1> [[TMP14]], <4 x ptr> poison) ; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP20]], <4 x i1> [[TMP15]], <4 x ptr> poison) ; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP21]], <4 x i1> [[TMP16]], <4 x ptr> poison) @@ -1364,9 +1364,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP30]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 -; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 +; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 4 +; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 12 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <4 x i1> [[TMP31]]) ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <4 x i1> [[TMP32]]) ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <4 x i1> [[TMP33]]) @@ -1424,9 +1424,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -1440,9 +1440,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[TMP8]], zeroinitializer ; AVX2-NEXT: [[TMP17:%.*]] = icmp ne <4 x i8> [[TMP9]], zeroinitializer ; AVX2-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 -; AVX2-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 -; AVX2-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 12 +; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 4 +; AVX2-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 8 +; AVX2-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 12 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP18]], <4 x i1> [[TMP14]], <4 x ptr> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP20]], <4 x i1> [[TMP15]], <4 x ptr> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP21]], <4 x i1> [[TMP16]], <4 x ptr> poison) @@ -1456,9 +1456,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP30]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 -; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 +; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 4 +; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 12 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <4 x i1> [[TMP31]]) ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <4 x i1> [[TMP32]]) ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <4 x i1> [[TMP33]]) @@ -1516,9 +1516,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 24 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 ; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 @@ -1532,9 +1532,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP16:%.*]] = icmp ne <8 x i8> [[TMP8]], zeroinitializer ; AVX512-NEXT: [[TMP17:%.*]] = icmp ne <8 x i8> [[TMP9]], zeroinitializer ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 16 -; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 24 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 8 +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 16 +; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 24 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP18]], <8 x i1> [[TMP14]], <8 x ptr> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP20]], <8 x i1> [[TMP15]], <8 x ptr> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP21]], <8 x i1> [[TMP16]], <8 x ptr> poison) @@ -1548,9 +1548,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP16]], <8 x i1> [[TMP29]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP34:%.*]] = select <8 x i1> [[TMP17]], <8 x i1> [[TMP30]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16 -; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24 +; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 16 +; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 24 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <8 x i1> [[TMP31]]) ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <8 x i1> [[TMP32]]) ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <8 x i1> [[TMP33]]) @@ -1653,9 +1653,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1: [[VECTOR_BODY]]: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -1669,9 +1669,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[TMP8]], zeroinitializer ; AVX1-NEXT: [[TMP17:%.*]] = icmp ne <4 x i8> [[TMP9]], zeroinitializer ; AVX1-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 -; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 -; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 12 +; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 4 +; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 8 +; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 12 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP18]], <4 x i1> [[TMP14]], <4 x ptr> poison) ; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP20]], <4 x i1> [[TMP15]], <4 x ptr> poison) ; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP21]], <4 x i1> [[TMP16]], <4 x ptr> poison) @@ -1685,9 +1685,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP30]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 -; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 +; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 4 +; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 12 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <4 x i1> [[TMP31]]) ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <4 x i1> [[TMP32]]) ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <4 x i1> [[TMP33]]) @@ -1745,9 +1745,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -1761,9 +1761,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[TMP8]], zeroinitializer ; AVX2-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[TMP9]], zeroinitializer ; AVX2-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 4 -; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 8 -; AVX2-NEXT: [[TMP23:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 12 +; AVX2-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 4 +; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 8 +; AVX2-NEXT: [[TMP23:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 12 ; AVX2-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP13]], <4 x i1> [[TMP17]], <4 x ptr> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP19]], <4 x i1> [[TMP15]], <4 x ptr> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP20]], <4 x i1> [[TMP16]], <4 x ptr> poison) @@ -1777,9 +1777,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> [[TMP21]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 -; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 +; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 4 +; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 12 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <4 x i1> [[TMP31]]) ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <4 x i1> [[TMP32]]) ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <4 x i1> [[TMP33]]) @@ -1837,9 +1837,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 24 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 ; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 @@ -1853,9 +1853,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP16:%.*]] = icmp ne <8 x i8> [[TMP8]], zeroinitializer ; AVX512-NEXT: [[TMP17:%.*]] = icmp ne <8 x i8> [[TMP9]], zeroinitializer ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 16 -; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 24 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 8 +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 16 +; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 24 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP18]], <8 x i1> [[TMP14]], <8 x ptr> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP20]], <8 x i1> [[TMP15]], <8 x ptr> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP21]], <8 x i1> [[TMP16]], <8 x ptr> poison) @@ -1869,9 +1869,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP16]], <8 x i1> [[TMP29]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP34:%.*]] = select <8 x i1> [[TMP17]], <8 x i1> [[TMP30]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16 -; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24 +; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 16 +; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 24 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <8 x i1> [[TMP31]]) ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <8 x i1> [[TMP32]]) ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <8 x i1> [[TMP33]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll index e23f8a9b63ef0..d514ab6bc72b7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -1186,13 +1186,13 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; O1VEC2: vector.body: ; O1VEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; O1VEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDEX]] -; O1VEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 4 +; O1VEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 4 ; O1VEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; O1VEC2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; O1VEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; O1VEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]] ; O1VEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDEX]] -; O1VEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 4 +; O1VEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i64 4 ; O1VEC2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4 ; O1VEC2-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP8]], align 4 ; O1VEC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -1214,13 +1214,13 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; OzVEC2: vector.body: ; OzVEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; OzVEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDEX]] -; OzVEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 4 +; OzVEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 4 ; OzVEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; OzVEC2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; OzVEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; OzVEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]] ; OzVEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDEX]] -; OzVEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 4 +; OzVEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i64 4 ; OzVEC2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4 ; OzVEC2-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP8]], align 4 ; OzVEC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll index de6418066dea0..2809a77b36f1a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll @@ -30,8 +30,8 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i8> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i32 0 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP27]], i32 -7 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP26]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP12]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP28]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i8> [[WIDE_LOAD]], <8 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP29]] = xor <8 x i8> [[REVERSE]], [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll index 31269b1b8c221..85d77eaadc632 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll @@ -35,17 +35,17 @@ define void @foo(ptr addrspace(1) align 8 dereferenceable_or_null(16), ptr addrs ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT12]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i32 8 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i32 12 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP5]], align 8, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP6]], align 8, !alias.scope [[META0]] ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP7]], align 8, !alias.scope [[META0]] ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP8]], align 8, !alias.scope [[META0]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT10]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i32 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i32 8 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i32 12 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i64 12 ; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD]], ptr addrspace(1) [[TMP9]], align 8, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD4]], ptr addrspace(1) [[TMP10]], align 8, !alias.scope [[META3]], !noalias [[META0]] ; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD5]], ptr addrspace(1) [[TMP11]], align 8, !alias.scope [[META3]], !noalias [[META0]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll index 3c618d71fc974..9217c905945ac 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -67,7 +67,7 @@ define i32 @main(ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[DOTPROMOTED]], [[INDEX]] ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP20]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP22]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP25]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll b/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll index 737bcf35fbd2c..38db41271d1f6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll @@ -124,7 +124,7 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon ; SSE41-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[TMP22]], [[TMP16]] ; SSE41-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[TMP23]], [[TMP17]] ; SSE41-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[D1:%.*]], i64 [[INDEX]] -; SSE41-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 4 +; SSE41-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 4 ; SSE41-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4 ; SSE41-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP29]], align 4 ; SSE41-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -250,9 +250,9 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon ; AVX1-NEXT: [[TMP69:%.*]] = add nsw <4 x i32> [[TMP67]], [[TMP46]] ; AVX1-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[TMP68]], [[TMP47]] ; AVX1-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[D1:%.*]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 4 -; AVX1-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 8 -; AVX1-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 12 +; AVX1-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 4 +; AVX1-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 8 +; AVX1-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 12 ; AVX1-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4 ; AVX1-NEXT: store <4 x i32> [[TMP20]], ptr [[TMP26]], align 4 ; AVX1-NEXT: store <4 x i32> [[TMP69]], ptr [[TMP71]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index 08855fe9ecba5..c756a54ec6d2b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -30,8 +30,8 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr align 8 [[TMP8]], <4 x i1> [[REVERSE]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll index 2aceb279d47db..5a396f88b1a64 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll @@ -76,7 +76,7 @@ define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -214,7 +214,7 @@ define void @switch_default_to_latch_common_dest_using_branches(ptr %start, ptr ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -337,7 +337,7 @@ define void @switch_all_dests_distinct(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -527,7 +527,7 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -687,7 +687,7 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -836,7 +836,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -1014,7 +1014,7 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end, ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -1167,7 +1167,7 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end, ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -1319,7 +1319,7 @@ define void @large_number_of_cases(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 1) diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll index 52e90e4475208..3afdf947081b6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll @@ -60,7 +60,7 @@ define float @reduction_sum_float_fastmath(i32 %n, ptr %array) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] @@ -111,7 +111,7 @@ define float @reduction_sum_float_only_reassoc(i32 %n, ptr %array) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6]] = fadd reassoc <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] @@ -162,7 +162,7 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, ptr %array) ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6]] = fadd reassoc contract <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] @@ -220,7 +220,7 @@ define float @PR35538(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = fcmp nnan ninf nsz oge <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] @@ -301,7 +301,7 @@ define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = fcmp nnan ninf oge <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index 602a3921eb34c..da48f984cb329 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -59,13 +59,13 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 8 -; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 16 -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 24 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]] -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]] -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] @@ -290,13 +290,13 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; MAX-BW-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 ; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]] -; MAX-BW-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 8 -; MAX-BW-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 16 -; MAX-BW-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 24 +; MAX-BW-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8 +; MAX-BW-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16 +; MAX-BW-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24 ; MAX-BW-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] -; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]] -; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]] -; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll index 8081c0e17f865..692ab3db0aa42 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll @@ -25,9 +25,9 @@ define void @foo(ptr nocapture noalias %A, i64 %N) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr @inc, align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[A]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP5]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index fda944e072d4a..714d01315e507 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -29,29 +29,29 @@ define void @vectorized(ptr noalias nocapture %A, ptr noalias nocapture readonly ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP0:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 12 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD5]] ; CHECK-NEXT: [[TMP13:%.*]] = fadd fast <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD6]] ; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[WIDE_LOAD3]], [[WIDE_LOAD7]] ; CHECK-NEXT: store <4 x float> [[TMP11]], ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: store <4 x float> [[TMP12]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: store <4 x float> [[TMP14]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: store <4 x float> [[TMP12]], ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: store <4 x float> [[TMP14]], ptr [[TMP7]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll index c8e3766aa936e..a792d2463e647 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll @@ -56,17 +56,17 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 16 -; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 32 -; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 48 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 16 +; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 32 +; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 48 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i32>, ptr [[TMP9]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i32>, ptr [[TMP10]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i32>, ptr [[TMP11]], align 4 ; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 16 -; NO-VP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 32 -; NO-VP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 48 +; NO-VP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 16 +; NO-VP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 32 +; NO-VP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 48 ; NO-VP-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP12]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP17]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP18]], align 4 @@ -76,9 +76,9 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: [[TMP22:%.*]] = add nsw <16 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD3]] ; NO-VP-NEXT: [[TMP23:%.*]] = add nsw <16 x i32> [[WIDE_LOAD8]], [[WIDE_LOAD4]] ; NO-VP-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 16 -; NO-VP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 32 -; NO-VP-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 48 +; NO-VP-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 16 +; NO-VP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 32 +; NO-VP-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 48 ; NO-VP-NEXT: store <16 x i32> [[TMP20]], ptr [[TMP24]], align 4 ; NO-VP-NEXT: store <16 x i32> [[TMP21]], ptr [[TMP29]], align 4 ; NO-VP-NEXT: store <16 x i32> [[TMP22]], ptr [[TMP30]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll index 8184cad22ae8b..26268f1ff4e94 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll @@ -18,9 +18,9 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 8 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 @@ -36,9 +36,9 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD6]], splat (i64 128) ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP27]] -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i64, ptr [[TMP28]], i32 4 -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP28]], i32 8 -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP28]], i32 12 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i64, ptr [[TMP28]], i64 4 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP28]], i64 8 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP28]], i64 12 ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP12]], ptr align 4 [[TMP28]], <4 x i1> [[TMP16]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP13]], ptr align 4 [[TMP33]], <4 x i1> [[TMP17]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP14]], ptr align 4 [[TMP34]], <4 x i1> [[TMP18]]) @@ -88,9 +88,9 @@ define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 8 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 @@ -102,9 +102,9 @@ define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD3]], splat (i64 128) ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP15]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[TMP24]], i32 4 -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP24]], i32 8 -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[TMP24]], i32 12 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[TMP24]], i64 4 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP24]], i64 8 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[TMP24]], i64 12 ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_LOAD]], ptr align 4 [[TMP24]], <4 x i1> [[TMP16]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_LOAD1]], ptr align 4 [[TMP29]], <4 x i1> [[TMP17]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_LOAD2]], ptr align 4 [[TMP30]], <4 x i1> [[TMP18]]) diff --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll index a9a0b33f542af..eddd5f9ddc584 100644 --- a/llvm/test/Transforms/LoopVectorize/assume.ll +++ b/llvm/test/Transforms/LoopVectorize/assume.ll @@ -11,7 +11,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+02) @@ -27,7 +27,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP9]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -80,13 +80,13 @@ define void @test2(ptr noalias %a, ptr noalias %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 2 ; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -151,13 +151,13 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x float> splat (float 2.300000e+01), <2 x float> splat (float 4.200000e+01) ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP2]], <2 x float> splat (float 2.300000e+01), <2 x float> splat (float 4.200000e+01) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[PREDPHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x float> [[PREDPHI1]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 2 ; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 1fe3962dfd072..6c63b823b7666 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -130,8 +130,8 @@ define i32 @consecutive_ptr_reverse(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[REVERSE]] @@ -177,8 +177,8 @@ define i32 @consecutive_ptr_reverse(ptr %a, i64 %n) { ; INTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; INTER-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; INTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] -; INTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 +; INTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 +; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 ; INTER-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 8 ; INTER-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; INTER-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[REVERSE]] diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll index fb45745eff1cb..4737a56df2735 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll @@ -19,7 +19,7 @@ define i8 @preserve_flags_when_cloning_trunc(i8 %start, ptr noalias %src, ptr no ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP4]], align 2 ; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP5]], align 2 ; CHECK-NEXT: [[TMP6]] = mul <4 x i8> [[VEC_PHI]], splat (i8 3) diff --git a/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll index 5d92c127aff93..901652537a5c5 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll @@ -16,19 +16,19 @@ define void @cse_replicate_gep(ptr noalias %A, ptr noalias %B, ptr noalias %C, i ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP8]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP8]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i16>, ptr [[TMP8]], align 2 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i64 4 ; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP3]], align 4 ; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD1]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[C]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[TMP5]], i64 4 ; CHECK-NEXT: store <4 x i16> [[WIDE_LOAD2]], ptr [[TMP5]], align 2 ; CHECK-NEXT: store <4 x i16> [[WIDE_LOAD3]], ptr [[TMP6]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -79,11 +79,11 @@ define void @cse_wide_gep(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %n ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[A]], <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[A]], <4 x i64> [[STEP_ADD]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP4]], align 8 ; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX1]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr ptr, ptr [[TMP6]], i64 4 ; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[TMP6]], align 8 ; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll index 02e1d0e9e7004..6e5213568c735 100644 --- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll @@ -25,7 +25,7 @@ define i64 @dead_instructions_01(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] @@ -133,13 +133,13 @@ define void @dead_load_and_vector_pointer(ptr %a, ptr %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 8, !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP5]], align 8, !alias.scope [[META5]], !noalias [[META8]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META5]], !noalias [[META8]] ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD2]], splat (i32 1) ; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr [[TMP2]], align 4, !alias.scope [[META5]], !noalias [[META8]] -; CHECK-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP5]], align 4, !alias.scope [[META5]], !noalias [[META8]] +; CHECK-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP1]], align 4, !alias.scope [[META5]], !noalias [[META8]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll index 274bd043cd86b..c23d28cdd0f3a 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll @@ -15,8 +15,8 @@ define dso_local void @constTC(ptr noalias nocapture %A) optsize { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 4 ; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP3]], align 1 ; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP7]], align 1 ; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP8]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll b/llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll index 4af9f4a13b62b..50e55f6051485 100644 --- a/llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll +++ b/llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll @@ -26,7 +26,7 @@ define void @test(ptr %dst) personality ptr null { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP4]], align 8 ; CHECK-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll b/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll index 22226a711bcf0..5edd83bd1e0d1 100644 --- a/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll @@ -19,8 +19,8 @@ define float @fmaxnum(ptr %src, i64 %n) { ; IC3-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; IC3-NEXT: [[VEC_PHI2:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; IC3-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]] -; IC3-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 4 -; IC3-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 8 +; IC3-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 4 +; IC3-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 8 ; IC3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; IC3-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; IC3-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 @@ -71,9 +71,9 @@ define float @fmaxnum(ptr %src, i64 %n) { ; IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]] -; IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 4 -; IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 8 -; IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 12 +; IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 4 +; IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 8 +; IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 12 ; IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 @@ -129,10 +129,10 @@ define float @fmaxnum(ptr %src, i64 %n) { ; IC5-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; IC5-NEXT: [[VEC_PHI4:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; IC5-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]] -; IC5-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 4 -; IC5-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 8 -; IC5-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 12 -; IC5-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 16 +; IC5-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 4 +; IC5-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 8 +; IC5-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 12 +; IC5-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 16 ; IC5-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; IC5-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; IC5-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll index eca39e6f0b6ba..cf2e7ccd1b2f0 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll @@ -98,7 +98,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i16> [[TMP0]], [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i64 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP3]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -160,7 +160,7 @@ define void @sink_dead_inst(ptr %a) { ; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i16> [[TMP5]], splat (i16 10) ; CHECK-NEXT: [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], splat (i16 10) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[A]], i16 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP9]], i32 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP9]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP7]], ptr [[TMP9]], align 2 ; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP11]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index cebd52fa7f866..063f47ce2b32d 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -33,7 +33,7 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i32>, ptr [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> @@ -41,7 +41,7 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add <4 x i32> [[WIDE_LOAD1]], [[TMP9]] -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP10]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -231,7 +231,7 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2]] = load <4 x i32>, ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> @@ -485,7 +485,7 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP4]], align 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP6]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> @@ -499,7 +499,7 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = fsub fast <4 x double> [[TMP9]], [[TMP13]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = fsub fast <4 x double> [[TMP10]], [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP15]], ptr [[TMP17]], align 8 ; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP16]], ptr [[TMP19]], align 8 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -1700,7 +1700,7 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP4]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> @@ -1712,7 +1712,7 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP7]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = mul nsw <4 x i32> [[TMP10]], [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP13]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -1915,7 +1915,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP5]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP6]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP7]], i64 1 -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP8]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP18]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP9]], align 2 @@ -1943,7 +1943,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP37]] ; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = mul nsw <4 x i32> [[TMP40]], [[TMP38]] ; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] -; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP41]], ptr [[TMP43]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP45]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -2146,7 +2146,7 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP4]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> @@ -2160,7 +2160,7 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP11]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = mul nsw <4 x i32> [[TMP10]], [[TMP12]] ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP17]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -2449,7 +2449,7 @@ define void @sink_dead_inst(ptr %a) { ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], splat (i16 10) ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = sub <4 x i16> [[TMP7]], splat (i16 10) ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP10]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP10]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP10]], align 2 ; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP9]], ptr [[TMP12]], align 2 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -3218,7 +3218,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]] ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP6]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll index 0745f286b2608..0d9d28d079b92 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll @@ -53,7 +53,7 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) @@ -127,10 +127,10 @@ define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV]] ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC_0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i32 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[GEP_SRC_1]], align 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI2]], <4 x float> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll index ac767c68e0b25..87942911e915f 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll @@ -21,32 +21,20 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE17:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]] -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP12]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META3]] +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], %[[PRED_LOAD_IF6]] ] -; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP18]], splat (i32 5) +; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP17]], splat (i32 5) ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: @@ -56,48 +44,30 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] -; CHECK: [[PRED_STORE_IF8]]: +; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] +; CHECK: [[PRED_STORE_IF6]]: ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 ; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] -; CHECK: [[PRED_STORE_CONTINUE9]]: -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11:.*]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP28]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP30:%.*]] = phi <2 x i32> [ poison, %[[PRED_STORE_CONTINUE9]] ], [ [[TMP29]], %[[PRED_LOAD_IF10]] ] -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]] -; CHECK: [[PRED_LOAD_IF12]]: -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> [[TMP30]], i32 [[TMP33]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]] -; CHECK: [[PRED_LOAD_CONTINUE13]]: -; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP34]], %[[PRED_LOAD_IF12]] ] -; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP35]], splat (i32 10) +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] +; CHECK: [[PRED_STORE_CONTINUE7]]: +; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10) ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] -; CHECK: [[PRED_STORE_IF14]]: +; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] +; CHECK: [[PRED_STORE_IF8]]: ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP36]], i32 0 ; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE15]] -; CHECK: [[PRED_STORE_CONTINUE15]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] +; CHECK: [[PRED_STORE_CONTINUE9]]: ; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17]] -; CHECK: [[PRED_STORE_IF16]]: +; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]] +; CHECK: [[PRED_STORE_IF10]]: ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[TMP36]], i32 1 ; CHECK-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE17]] -; CHECK: [[PRED_STORE_CONTINUE17]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]] +; CHECK: [[PRED_STORE_CONTINUE11]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -319,7 +289,7 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] @@ -327,79 +297,59 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]] ; CHECK-NEXT: store i32 10, ptr [[TMP10]], align 4, !alias.scope [[META25:![0-9]+]], !noalias [[META27:![0-9]+]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !alias.scope [[META30:![0-9]+]] -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP13]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] -; CHECK: [[PRED_LOAD_IF19]]: +; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] +; CHECK: [[PRED_STORE_IF19]]: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP5]] ; CHECK-NEXT: store i32 10, ptr [[TMP16]], align 4, !alias.scope [[META25]], !noalias [[META27]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]] +; CHECK: [[PRED_STORE_CONTINUE20]]: +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META30:![0-9]+]] ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META30]] +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE20]] -; CHECK: [[PRED_LOAD_CONTINUE20]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP14]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP19]], %[[PRED_LOAD_IF19]] ] -; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP20]], splat (i32 5) +; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP19]], splat (i32 5) ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] +; CHECK: [[PRED_STORE_IF21]]: ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 ; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] +; CHECK: [[PRED_STORE_CONTINUE22]]: ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] -; CHECK: [[PRED_STORE_IF21]]: +; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] +; CHECK: [[PRED_STORE_IF23]]: ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1 ; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] -; CHECK: [[PRED_STORE_CONTINUE22]]: -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]] -; CHECK: [[PRED_LOAD_IF23]]: -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4, !alias.scope [[META30]] -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> poison, i32 [[TMP30]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE24]] -; CHECK: [[PRED_LOAD_CONTINUE24]]: -; CHECK-NEXT: [[TMP32:%.*]] = phi <2 x i32> [ poison, %[[PRED_STORE_CONTINUE22]] ], [ [[TMP31]], %[[PRED_LOAD_IF23]] ] -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP33]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]] -; CHECK: [[PRED_LOAD_IF25]]: -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4, !alias.scope [[META30]] -; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i32> [[TMP32]], i32 [[TMP35]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE26]] -; CHECK: [[PRED_LOAD_CONTINUE26]]: -; CHECK-NEXT: [[TMP37:%.*]] = phi <2 x i32> [ [[TMP32]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP36]], %[[PRED_LOAD_IF25]] ] -; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP37]], splat (i32 10) +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]] +; CHECK: [[PRED_STORE_CONTINUE24]]: +; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]] -; CHECK: [[PRED_STORE_IF27]]: +; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] +; CHECK: [[PRED_STORE_IF25]]: ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[TMP38]], i32 0 ; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] -; CHECK: [[PRED_STORE_CONTINUE28]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]] +; CHECK: [[PRED_STORE_CONTINUE26]]: ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30]] -; CHECK: [[PRED_STORE_IF29]]: +; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28]] +; CHECK: [[PRED_STORE_IF27]]: ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP38]], i32 1 ; CHECK-NEXT: store i32 [[TMP44]], ptr [[TMP43]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE30]] -; CHECK: [[PRED_STORE_CONTINUE30]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] +; CHECK: [[PRED_STORE_CONTINUE28]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP45]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] @@ -609,7 +559,7 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE26:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] @@ -617,62 +567,56 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]] ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4, !alias.scope [[META48:![0-9]+]], !noalias [[META50:![0-9]+]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !alias.scope [[META53:![0-9]+]] -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP13]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] -; CHECK: [[PRED_LOAD_IF19]]: +; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] +; CHECK: [[PRED_STORE_IF19]]: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP5]] ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4, !alias.scope [[META48]], !noalias [[META50]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]] +; CHECK: [[PRED_STORE_CONTINUE20]]: +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META53:![0-9]+]] ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META53]] +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE20]] -; CHECK: [[PRED_LOAD_CONTINUE20]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP14]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP19]], %[[PRED_LOAD_IF19]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP20]], splat (i32 10) +; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] +; CHECK: [[PRED_STORE_IF21]]: ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 ; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META54:![0-9]+]], !noalias [[META55:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] +; CHECK: [[PRED_STORE_CONTINUE22]]: ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] -; CHECK: [[PRED_STORE_IF21]]: +; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] +; CHECK: [[PRED_STORE_IF23]]: ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1 ; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META54]], !noalias [[META55]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] -; CHECK: [[PRED_STORE_CONTINUE22]]: -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] -; CHECK: [[PRED_STORE_IF23]]: -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP29]], align 4, !alias.scope [[META53]] -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP34]], ptr [[TMP31]], align 4, !alias.scope [[META54]], !noalias [[META55]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]] ; CHECK: [[PRED_STORE_CONTINUE24]]: -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 +; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] ; CHECK: [[PRED_STORE_IF25]]: -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP33]], align 4, !alias.scope [[META53]] -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: store i32 [[TMP30]], ptr [[TMP35]], align 4, !alias.scope [[META54]], !noalias [[META55]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP31]], align 4, !alias.scope [[META54]], !noalias [[META55]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]] ; CHECK: [[PRED_STORE_CONTINUE26]]: +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 +; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28]] +; CHECK: [[PRED_STORE_IF27]]: +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] +; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP35]], align 4, !alias.scope [[META54]], !noalias [[META55]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] +; CHECK: [[PRED_STORE_CONTINUE28]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP56:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll index e4c893f5269bb..f6dd8564c001b 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll @@ -21,51 +21,20 @@ define void @test(ptr %dst, ptr %src, ptr %cond) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP34:%.*]] = xor <2 x i1> [[TMP15]], splat (i1 true) -; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i1> [[TMP34]], i32 0 -; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META3:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META3]] ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP19]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP34]], i32 1 -; CHECK-NEXT: br i1 [[TMP21]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> [[TMP20]], i32 [[TMP22]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP36:%.*]] = phi <2 x i32> [ [[TMP20]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP23]], %[[PRED_LOAD_IF6]] ] +; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1 ; CHECK-NEXT: [[TMP25:%.*]] = add <2 x i32> [[TMP36]], splat (i32 10) -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] -; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> poison, i32 [[TMP26]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] -; CHECK: [[PRED_LOAD_CONTINUE9]]: -; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP31]], %[[PRED_LOAD_IF8]] ] -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 -; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP33]], i32 [[TMP27]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP33]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP29]], <2 x i32> [[TMP25]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP36]], <2 x i32> [[TMP25]] ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP37]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -450,7 +419,7 @@ exit: ret void } -; Positive test: Same address with different alignments - should hoist with minimum alignment +; Make sure the minimum alignment is used when loads have different alignments. define void @different_alignments_same_address(ptr %dst, ptr %src, ptr %cond) { ; CHECK-LABEL: define void @different_alignments_same_address( ; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], ptr [[COND:%.*]]) { @@ -471,53 +440,22 @@ define void @different_alignments_same_address(ptr %dst, ptr %src, ptr %cond) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP20]], align 4, !alias.scope [[META36:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP15]], splat (i1 true) -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0 -; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META39:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 2, !alias.scope [[META39:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP9]], align 2, !alias.scope [[META39]] ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP19]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP16]], i32 1 -; CHECK-NEXT: br i1 [[TMP21]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META39]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> [[TMP35]], i32 [[TMP22]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i32> [ [[TMP35]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP23]], %[[PRED_LOAD_IF6]] ] -; CHECK-NEXT: [[TMP25:%.*]] = add <2 x i32> [[TMP24]], splat (i32 10) -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] -; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP8]], align 2, !alias.scope [[META39]] -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> poison, i32 [[TMP26]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] -; CHECK: [[PRED_LOAD_CONTINUE9]]: -; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP31]], %[[PRED_LOAD_IF8]] ] -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 -; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP9]], align 2, !alias.scope [[META39]] -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP33]], i32 [[TMP27]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP33]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP29]], <2 x i32> [[TMP25]] -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP34]], align 4, !alias.scope [[META41:![0-9]+]], !noalias [[META43:![0-9]+]] +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP26:%.*]] = add <2 x i32> [[TMP25]], splat (i32 10) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP25]], <2 x i32> [[TMP26]] +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP35]], align 4, !alias.scope [[META41:![0-9]+]], !noalias [[META43:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] @@ -642,50 +580,19 @@ define void @duplicate_gep(ptr %dst, ptr %src, ptr %cond) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META46:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META49:![0-9]+]] -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP12]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META49]] -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], %[[PRED_LOAD_IF6]] ] -; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP18]], splat (i32 10) -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] -; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4, !alias.scope [[META49]] +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META49:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META49]] ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] -; CHECK: [[PRED_LOAD_CONTINUE9]]: -; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP23]], %[[PRED_LOAD_IF8]] ] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META49]] -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP24]], i32 [[TMP27]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP24]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ] +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP29]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP29]], <2 x i32> [[TMP19]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4, !alias.scope [[META51:![0-9]+]], !noalias [[META53:![0-9]+]] @@ -752,50 +659,19 @@ define void @non_unit_stride_i64(ptr %dst, ptr %src, ptr %cond) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP6]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META56:![0-9]+]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true) -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 -; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META59:![0-9]+]] -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP13]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP14]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 -; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP7]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META59]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP18]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP15]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP19]], %[[PRED_LOAD_IF6]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP20]], splat (i32 10) -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] -; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4, !alias.scope [[META59]] +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META59:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META59]] ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> poison, i32 [[TMP24]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] -; CHECK: [[PRED_LOAD_CONTINUE9]]: -; CHECK-NEXT: [[TMP26:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP25]], %[[PRED_LOAD_IF8]] ] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 -; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP7]] -; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4, !alias.scope [[META59]] -; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> [[TMP26]], i32 [[TMP29]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP31:%.*]] = phi <2 x i32> [ [[TMP26]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP30]], %[[PRED_LOAD_IF10]] ] +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> [[TMP25]], i32 [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP31]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[TMP31]], <2 x i32> [[TMP21]] ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP6]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP32]], align 4, !alias.scope [[META61:![0-9]+]], !noalias [[META63:![0-9]+]] @@ -1045,55 +921,15 @@ define void @hoist_predicated_load_with_chained_geps1(ptr %dst, ptr %src, i1 %co ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE8:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 2, !alias.scope [[META68:![0-9]+]] -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP7]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] -; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i64 8 -; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP11]], align 2, !alias.scope [[META68]] -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP12]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] -; CHECK: [[PRED_LOAD_CONTINUE4]]: -; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], %[[PRED_LOAD_IF3]] ] -; CHECK-NEXT: br i1 [[COND]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] -; CHECK: [[PRED_LOAD_IF5]]: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i64 8 -; CHECK-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 2, !alias.scope [[META68]] -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i16> poison, i16 [[TMP17]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] -; CHECK: [[PRED_LOAD_CONTINUE6]]: -; CHECK-NEXT: [[TMP19:%.*]] = phi <2 x i16> [ poison, %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP18]], %[[PRED_LOAD_IF5]] ] -; CHECK-NEXT: br i1 [[COND]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8]] -; CHECK: [[PRED_LOAD_IF7]]: ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i64 8 -; CHECK-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META68]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i16> [[TMP19]], i16 [[TMP22]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE8]] -; CHECK: [[PRED_LOAD_CONTINUE8]]: -; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i16> [ [[TMP19]], %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP23]], %[[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[COND]], <2 x i16> [[TMP24]], <2 x i16> [[TMP14]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 -; CHECK-NEXT: store i16 [[TMP25]], ptr [[DST]], align 2, !alias.scope [[META71:![0-9]+]], !noalias [[META68]] +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META68:![0-9]+]] +; CHECK-NEXT: store i16 [[TMP4]], ptr [[DST]], align 2, !alias.scope [[META71:![0-9]+]], !noalias [[META68]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP73:![0-9]+]] @@ -1145,55 +981,15 @@ define void @hoist_predicated_load_with_chained_geps2(ptr %dst, ptr %src, i1 %co ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE8:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> [[TMP5]], ptr [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8 -; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP8]], align 2, !alias.scope [[META75:![0-9]+]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i16> poison, i16 [[TMP9]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP10]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] -; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8 -; CHECK-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP13]], align 2, !alias.scope [[META75]] -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i16> [[TMP11]], i16 [[TMP14]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] -; CHECK: [[PRED_LOAD_CONTINUE4]]: -; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i16> [ [[TMP11]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], %[[PRED_LOAD_IF3]] ] -; CHECK-NEXT: br i1 [[COND]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] -; CHECK: [[PRED_LOAD_IF5]]: -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8 -; CHECK-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP17]], align 2, !alias.scope [[META75]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i16> poison, i16 [[TMP18]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] -; CHECK: [[PRED_LOAD_CONTINUE6]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i16> [ poison, %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP19]], %[[PRED_LOAD_IF5]] ] -; CHECK-NEXT: br i1 [[COND]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8]] -; CHECK: [[PRED_LOAD_IF7]]: ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8 -; CHECK-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META75]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i16> [[TMP20]], i16 [[TMP22]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE8]] -; CHECK: [[PRED_LOAD_CONTINUE8]]: -; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i16> [ [[TMP20]], %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP23]], %[[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[COND]], <2 x i16> [[TMP24]], <2 x i16> [[TMP16]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 -; CHECK-NEXT: store i16 [[TMP25]], ptr [[DST]], align 2, !alias.scope [[META78:![0-9]+]], !noalias [[META75]] +; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META75:![0-9]+]] +; CHECK-NEXT: store i16 [[TMP5]], ptr [[DST]], align 2, !alias.scope [[META78:![0-9]+]], !noalias [[META75]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP80:![0-9]+]] @@ -1262,7 +1058,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META62:![0-9]+]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META82:![0-9]+]] ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] ; CHECK: [[PRED_LOAD_CONTINUE]]: @@ -1270,7 +1066,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF2:.*]], label %[[PRED_LOAD_CONTINUE3:.*]] ; CHECK: [[PRED_LOAD_IF2]]: -; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META62]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META82]] ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP21]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE3]] ; CHECK: [[PRED_LOAD_CONTINUE3]]: @@ -1280,7 +1076,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP25]], i32 0 ; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF4:.*]], label %[[PRED_LOAD_CONTINUE5:.*]] ; CHECK: [[PRED_LOAD_IF4]]: -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META62]] +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META82]] ; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> poison, i32 [[TMP27]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE5]] ; CHECK: [[PRED_LOAD_CONTINUE5]]: @@ -1288,7 +1084,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP25]], i32 1 ; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] ; CHECK: [[PRED_LOAD_IF6]]: -; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META62]] +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META82]] ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> [[TMP29]], i32 [[TMP31]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] ; CHECK: [[PRED_LOAD_CONTINUE7]]: @@ -1297,7 +1093,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 ; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] ; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META62]] +; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META82]] ; CHECK-NEXT: [[TMP37:%.*]] = insertelement <2 x i32> poison, i32 [[TMP36]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] ; CHECK: [[PRED_LOAD_CONTINUE9]]: @@ -1305,7 +1101,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 ; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] ; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META62]] +; CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META82]] ; CHECK-NEXT: [[TMP41:%.*]] = insertelement <2 x i32> [[TMP38]], i32 [[TMP40]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] ; CHECK: [[PRED_LOAD_CONTINUE11]]: @@ -1313,10 +1109,10 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[TMP34]], <2 x i32> [[TMP24]] ; CHECK-NEXT: [[PREDPHI16:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP42]], <2 x i32> [[PREDPHI]] ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store <2 x i32> [[PREDPHI16]], ptr [[TMP43]], align 4, !alias.scope [[META65:![0-9]+]], !noalias [[META62]] +; CHECK-NEXT: store <2 x i32> [[PREDPHI16]], ptr [[TMP43]], align 4, !alias.scope [[META85:![0-9]+]], !noalias [[META82]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP44]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP67:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP44]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP87:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index 73a2203c3115b..eab9df558f608 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -1648,8 +1648,8 @@ define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 -3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[REVERSE]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll b/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll index 4e51d6e9ba708..dee377d61ba30 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll @@ -81,8 +81,8 @@ define i32 @induction_trunc_wrapflags(ptr %p) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 326, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 -3 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[VEC_IND]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: store <4 x i8> [[REVERSE]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index bacb8eb10c755..b6fb378a042fd 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -155,7 +155,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP6]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -469,20 +469,20 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], [[OFFSET]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP7]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP9]], align 4, !alias.scope [[META4]], !noalias [[META7]] +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP8]], align 4, !alias.scope [[META4]], !noalias [[META7]] ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[OFFSET2]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP10]] -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x float>, ptr [[TMP11]], align 4, !alias.scope [[META7]] -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x float>, ptr [[TMP13]], align 4, !alias.scope [[META7]] +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x float>, ptr [[TMP12]], align 4, !alias.scope [[META7]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD5]] ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD6]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = fadd fast <2 x float> [[WIDE_LOAD4]], [[TMP15]] ; UNROLL-NO-IC-NEXT: store <2 x float> [[TMP16]], ptr [[TMP7]], align 4, !alias.scope [[META4]], !noalias [[META7]] -; UNROLL-NO-IC-NEXT: store <2 x float> [[TMP17]], ptr [[TMP9]], align 4, !alias.scope [[META4]], !noalias [[META7]] +; UNROLL-NO-IC-NEXT: store <2 x float> [[TMP17]], ptr [[TMP8]], align 4, !alias.scope [[META4]], !noalias [[META7]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -743,7 +743,7 @@ define i64 @scalarize_induction_variable_01(ptr %a, i64 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 ; UNROLL-NO-IC-NEXT: [[TMP4]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] @@ -2179,7 +2179,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[PRED_UDIV_CONTINUE8]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP33:%.*]], [[PRED_UDIV_CONTINUE8]] ] ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] @@ -3512,7 +3512,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i8 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP13]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -3897,7 +3897,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i8 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP14]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP16]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -4151,7 +4151,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -4373,7 +4373,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP5]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP9]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -4600,7 +4600,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -4815,7 +4815,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -5774,7 +5774,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add <2 x i32> [[STEP_ADD]], [[TMP6]] -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP7]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP7]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP9]], ptr [[TMP11]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -6131,7 +6131,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP20]], ptr [[TMP22]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP21]], ptr [[TMP24]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll index 2c97bb7622740..ffe9da09ca680 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll @@ -19,10 +19,10 @@ define void @i65_induction_with_negative_step(ptr %dst) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[VEC_IND]], <4 x i64> [[STEP_ADD]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 -4 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 -4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: store <4 x i64> [[REVERSE]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index 69d2aa4c620c1..2200a7d0431d2 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -16,8 +16,8 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) @@ -52,14 +52,14 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) ; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] ; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 -; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -4 -; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -3 -; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -8 -; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3 -; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -12 -; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3 +; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 +; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 +; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -4 +; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 -3 +; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -8 +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 +; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -12 +; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 ; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -172,8 +172,8 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 1 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i16> [[REVERSE]], [[BROADCAST_SPLAT]] @@ -498,8 +498,8 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x half>, ptr [[TMP2]], align 1 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x half> [[WIDE_LOAD]], <4 x half> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = fcmp ugt <4 x half> [[REVERSE]], [[BROADCAST_SPLAT]] @@ -822,8 +822,8 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) @@ -858,14 +858,14 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) ; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]] ; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 -; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -4 -; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -3 -; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -8 -; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3 -; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -12 -; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3 +; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 +; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 +; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -4 +; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 -3 +; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -8 +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 +; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -12 +; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 ; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll index 24c5602a580da..ba326e254f3cd 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll @@ -82,9 +82,9 @@ define i64 @select_iv_def_from_outer_loop(ptr %a, i64 %start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 [[TMP1]] -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll index b991d58eb2b8d..45c2abd43c36a 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll @@ -85,9 +85,9 @@ define i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -281,9 +281,9 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -424,9 +424,9 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 @@ -573,9 +573,9 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 2147483646, [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll index 8d3bd267b9482..a071949f82062 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll @@ -69,9 +69,9 @@ define i64 @select_icmp_const_1(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -262,9 +262,9 @@ define i64 @select_icmp_const_2(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -455,9 +455,9 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 % ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -648,9 +648,9 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 @@ -841,9 +841,9 @@ define i64 @select_fcmp_const(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 @@ -1038,17 +1038,17 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 @@ -1257,17 +1257,17 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP6]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP9]], align 4 @@ -1481,17 +1481,17 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 % ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 @@ -1706,17 +1706,17 @@ define i64 @select_icmp_unsigned_iv_range(ptr %a, ptr %b, i64 %rdx.start) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 4717521980966..b4fd06316a2e5 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -1249,8 +1249,8 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 3 -; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i32 0 -; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 -1 +; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i64 0 +; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i64 -1 ; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2 ; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1 ; VEC-NEXT: [[IV_1_NEXT_LCSSA1:%.*]] = add i64 [[TMP5]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 4b3f370f4cdaf..8d3d0ff7a6406 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -296,14 +296,14 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 -1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD1]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 @@ -388,8 +388,8 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 -1) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 -1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) @@ -544,8 +544,8 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 511, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) diff --git a/llvm/test/Transforms/LoopVectorize/metadata.ll b/llvm/test/Transforms/LoopVectorize/metadata.ll index ed027e8b9a895..fed5df2b65228 100644 --- a/llvm/test/Transforms/LoopVectorize/metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/metadata.ll @@ -62,20 +62,20 @@ define void @fp_math(ptr nocapture %a, ptr noalias %b, i64 %size) { ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0:![0-9]+]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP3]], align 4, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 9.900000e+01), !fpmath [[META3:![0-9]+]] ; INTERLEAVE-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD1]], splat (double 9.900000e+01), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp oge <2 x double> [[TMP4]], splat (double 1.000000e+01) ; INTERLEAVE-NEXT: [[TMP7:%.*]] = fcmp oge <2 x double> [[TMP5]], splat (double 1.000000e+01) -; INTERLEAVE-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP6]], <2 x double> [[WIDE_LOAD]], <2 x double> zeroinitializer, !fpmath [[META3]] +; INTERLEAVE-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP6]], <2 x double> [[WIDE_LOAD]], <2 x double> zeroinitializer, !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x double> [[WIDE_LOAD1]], <2 x double> zeroinitializer, !fpmath [[META3]] -; INTERLEAVE-NEXT: [[TMP9:%.*]] = fptrunc <2 x double> [[TMP11]] to <2 x float>, !fpmath [[META3]] +; INTERLEAVE-NEXT: [[TMP9:%.*]] = fptrunc <2 x double> [[TMP12]] to <2 x float>, !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP10:%.*]] = fptrunc <2 x double> [[TMP8]] to <2 x float>, !fpmath [[META3]] -; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 2 +; INTERLEAVE-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 2 ; INTERLEAVE-NEXT: store <2 x float> [[TMP9]], ptr [[TMP1]], align 4, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: store <2 x float> [[TMP10]], ptr [[TMP13]], align 4, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: store <2 x float> [[TMP10]], ptr [[TMP11]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -154,13 +154,13 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 4, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP1]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x i64> @foo_vector_fixed2_nomask(<2 x i64> [[WIDE_LOAD]]) ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x i64> @foo_vector_fixed2_nomask(<2 x i64> [[WIDE_LOAD1]]) ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 2 ; INTERLEAVE-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4 ; INTERLEAVE-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP7]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -220,13 +220,13 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP1]], align 8, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x double> @bar_vector_fixed2_nomask(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x double> @bar_vector_fixed2_nomask(<2 x double> [[WIDE_LOAD1]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i64 2 ; INTERLEAVE-NEXT: store <2 x double> [[TMP3]], ptr [[TMP5]], align 8 ; INTERLEAVE-NEXT: store <2 x double> [[TMP4]], ptr [[TMP7]], align 8 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -286,13 +286,13 @@ define void @widen_intrinsic(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4 ; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 4 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[WIDE_LOAD]], i1 true) ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[WIDE_LOAD1]], i1 true) ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 2 ; INTERLEAVE-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4 ; INTERLEAVE-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP7]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -352,13 +352,13 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP1]], align 8, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[WIDE_LOAD1]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i64 2 ; INTERLEAVE-NEXT: store <2 x double> [[TMP3]], ptr [[TMP5]], align 8 ; INTERLEAVE-NEXT: store <2 x double> [[TMP4]], ptr [[TMP7]], align 8 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -449,10 +449,10 @@ define void @unknown_metadata(ptr nocapture %a, ptr noalias %b, i64 %size) { ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], <2 x i64> [[VEC_IND]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], <2 x i64> [[STEP_ADD]] -; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2 +; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 2 ; INTERLEAVE-NEXT: store <2 x i32> [[VEC_IND1]], ptr [[TMP3]], align 4 ; INTERLEAVE-NEXT: store <2 x i32> [[STEP_ADD3]], ptr [[TMP5]], align 4 -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: store <2 x ptr> [[TMP1]], ptr [[TMP0]], align 8 ; INTERLEAVE-NEXT: store <2 x ptr> [[TMP2]], ptr [[TMP7]], align 8 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -582,7 +582,7 @@ define void @noalias_metadata(ptr align 8 %dst, ptr align 8 %src) { ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP26:%.*]] = mul i64 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP26]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP7]], align 8, !alias.scope [[META14:![0-9]+]] ; INTERLEAVE-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i32 1 ; INTERLEAVE-NEXT: store ptr [[TMP8]], ptr [[DST]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META19:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll b/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll index 47a2a84b44601..120307629c82e 100644 --- a/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll @@ -14,7 +14,7 @@ define float @maximumnum_intrinsic(ptr readonly %x) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3]] = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]]) @@ -58,7 +58,7 @@ define float @maximumnum_intrinsic_fast(ptr readonly %x) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3]] = call fast <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]]) @@ -102,7 +102,7 @@ define float @minimumnum_intrinsic(ptr readonly %x) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3]] = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]]) @@ -146,7 +146,7 @@ define float @minimumnum_intrinsic_fast(ptr readonly %x) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3]] = call fast <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll index 440309d246899..5385a83dfac65 100644 --- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll +++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll @@ -197,10 +197,10 @@ define void @narrow_widen_store_user(i32 %x, ptr noalias %A, ptr noalias %B) { ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]] ; VF2IC2-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]] -; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP2]], i32 2 +; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP2]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 4 -; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i32 2 +; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP3]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP5]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll index 3bf5c0d1d13a9..1f1bbf7c27ada 100644 --- a/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll @@ -254,7 +254,7 @@ define void @pr52024(ptr %dst, i16 %N) { ; CHECK-NEXT: [[TMP10:%.*]] = zext <2 x i16> [[TMP8]] to <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i16> [[TMP9]] to <2 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i64 2 ; CHECK-NEXT: store <2 x i32> [[TMP10]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store <2 x i32> [[TMP11]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -350,7 +350,7 @@ define void @test_expand_secv_in_entry_before_gep(ptr %dst) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OUTER_IV]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[GEP_M]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 2 ; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[TMP3]], align 8 ; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll index 30d01e8b790a7..4fab2995f14bd 100644 --- a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll +++ b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll @@ -11,14 +11,14 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]]) ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[TMP4]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -78,14 +78,14 @@ define void @test2(ptr nocapture readonly %d) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META0]]) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -165,13 +165,13 @@ define void @predicated_noalias_scope_decl(ptr noalias nocapture readonly %a, pt ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x float> splat (float 2.300000e+01), <4 x float> splat (float 4.200000e+01) ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP2]], <4 x float> splat (float 2.300000e+01), <4 x float> splat (float 4.200000e+01) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[PREDPHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[PREDPHI1]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index b5d74687dc808..1a1c05187590e 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -173,8 +173,8 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[N]] ; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 -3 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00) @@ -203,8 +203,8 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], [[N]] ; CHECK-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 0 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 -3 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4 ; CHECK-NEXT: [[REVERSE10:%.*]] = shufflevector <4 x float> [[WIDE_LOAD9]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <4 x float> [[REVERSE10]], splat (float 1.000000e+00) diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 5c62ca3ff3d01..d96134e8adf1d 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -38,8 +38,8 @@ define void @a(ptr readnone %b) { ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <4 x i8> [[REVERSE]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll index 28d5ef552482b..33b3d263e634a 100644 --- a/llvm/test/Transforms/LoopVectorize/pr37248.ll +++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll @@ -56,8 +56,8 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] ; CHECK: [[PRED_STORE_CONTINUE3]]: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i32 -1 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i64 -1 ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -119,8 +119,8 @@ define void @f2(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i32 -1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i64 -1 ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index 565e203e68f72..3276528e54225 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -125,7 +125,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC2-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] ; IC2-NEXT: [[TMP30:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP4]], i32 0 ; IC2-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> [[TMP30]], ptr [[NEXT_GEP5]], i32 1 -; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 +; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2 ; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1 ; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1 ; IC2-NEXT: [[TMP13:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12) @@ -344,21 +344,21 @@ define void @switch_to_header(ptr %start) { ; IC1-NEXT: [[ENTRY:.*]]: ; IC1-NEXT: br label %[[LOOP_HEADER:.*]] ; IC1: [[LOOP_HEADER]]: -; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] +; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] ; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC1-NEXT: i64 120, label %[[IF_THEN1]] +; IC1-NEXT: i64 120, label %[[IF_THEN]] ; IC1-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC1-NEXT: ] -; IC1: [[IF_THEN1]]: +; IC1: [[IF_THEN]]: ; IC1-NEXT: br label %[[LOOP_HEADER]] -; IC1: [[IF_THEN:.*:]] +; IC1: [[IF_THEN1:.*:]] ; IC1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC1-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC1-NEXT: unreachable ; IC1: [[LOOP_LATCH]]: ; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] +; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] ; IC1: [[EXIT]]: ; IC1-NEXT: ret void ; @@ -367,21 +367,21 @@ define void @switch_to_header(ptr %start) { ; IC2-NEXT: [[ENTRY:.*]]: ; IC2-NEXT: br label %[[LOOP_HEADER:.*]] ; IC2: [[LOOP_HEADER]]: -; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] +; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] ; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC2-NEXT: i64 120, label %[[IF_THEN1]] +; IC2-NEXT: i64 120, label %[[IF_THEN]] ; IC2-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC2-NEXT: ] -; IC2: [[IF_THEN1]]: +; IC2: [[IF_THEN]]: ; IC2-NEXT: br label %[[LOOP_HEADER]] -; IC2: [[IF_THEN:.*:]] +; IC2: [[IF_THEN1:.*:]] ; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC2-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC2-NEXT: unreachable ; IC2: [[LOOP_LATCH]]: ; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] +; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] ; IC2: [[EXIT]]: ; IC2-NEXT: ret void ; @@ -437,7 +437,7 @@ define void @switch_all_to_default(ptr %start) { ; IC2: [[VECTOR_BODY]]: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 [[INDEX]] -; IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 +; IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 ; IC2-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP2]], align 1 ; IC2-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP5]], align 1 ; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -497,7 +497,7 @@ define void @switch_unconditional(ptr %start) { ; IC2: [[VECTOR_BODY]]: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[START]], i64 [[INDEX]] -; IC2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP2]], i32 2 +; IC2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP2]], i64 2 ; IC2-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP2]], align 4 ; IC2-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP1]], align 4 ; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index fafa82c211dc6..43dede0b612f3 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -37,7 +37,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -115,11 +115,11 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]]) @@ -203,7 +203,7 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -286,11 +286,11 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND]]) @@ -381,11 +381,11 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]]) @@ -469,11 +469,11 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 19, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 1, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -551,11 +551,11 @@ define i32 @start_at_non_zero(ptr nocapture %in, ptr nocapture %coeff, ptr nocap ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 120, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COEFF]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]] @@ -632,11 +632,11 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -1, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ -1, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -714,11 +714,11 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]] @@ -794,11 +794,11 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]] @@ -875,11 +875,11 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[WIDE_LOAD]]) @@ -958,11 +958,11 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi float [ 1.000000e+00, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]]) @@ -1038,7 +1038,7 @@ define i32 @reduction_sub_lhs(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 3, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -1122,11 +1122,11 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[PREDPHI6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]] @@ -1207,11 +1207,11 @@ for.end: define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-NEXT: [[_LR_PH:.*]]: +; CHECK-NEXT: [[_LR_PH1:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH1:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1231,11 +1231,11 @@ define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocaptu ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-INTERLEAVED-NEXT: [[_LR_PH:.*]]: +; CHECK-INTERLEAVED-NEXT: [[_LR_PH1:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1322,11 +1322,11 @@ define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-INTERLEAVED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]]) @@ -1416,7 +1416,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI1]], splat (i32 255) ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> @@ -1502,7 +1502,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI1]], splat (i32 255) ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> @@ -1606,11 +1606,11 @@ define float @reduction_fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi float [ -0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]] @@ -1856,11 +1856,11 @@ define float @reduction_fmuladd_blend(ptr %a, ptr %b, i64 %n, i1 %c) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi float [ -0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]] @@ -2005,7 +2005,7 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], zeroinitializer @@ -2162,7 +2162,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], zeroinitializer @@ -2268,10 +2268,10 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP8]], align 4 @@ -2284,7 +2284,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP0]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP23]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -2293,7 +2293,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP19]], i32 1 ; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] ; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]] ; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP29]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] @@ -2302,7 +2302,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP19]], i32 2 ; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] ; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP2]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]] ; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 ; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP35]], i32 2 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] @@ -2311,7 +2311,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP19]], i32 3 ; CHECK-NEXT: br i1 [[TMP38]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]] ; CHECK: [[PRED_LOAD_IF5]]: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP3]] +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]] ; CHECK-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 ; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP41]], i32 3 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] @@ -2350,14 +2350,14 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6 ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7 -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP4]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP5]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP6]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP7]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP4]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP5]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP6]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP7]], i32 1 ; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP8]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP10]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP12]], align 4 @@ -2379,7 +2379,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP39]], i32 0 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP41]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF]]: -; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP0]] +; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]] ; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = insertelement <4 x i32> poison, i32 [[TMP44]], i32 0 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -2388,7 +2388,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = extractelement <4 x i1> [[TMP39]], i32 1 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP47]], label %[[PRED_LOAD_IF2:.*]], label %[[PRED_LOAD_CONTINUE3:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF2]]: -; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP1]] +; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]] ; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = insertelement <4 x i32> [[TMP46]], i32 [[TMP50]], i32 1 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE3]] @@ -2397,7 +2397,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = extractelement <4 x i1> [[TMP39]], i32 2 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP53]], label %[[PRED_LOAD_IF4:.*]], label %[[PRED_LOAD_CONTINUE5:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF4]]: -; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP2]] +; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]] ; CHECK-INTERLEAVED-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP57:%.*]] = insertelement <4 x i32> [[TMP52]], i32 [[TMP56]], i32 2 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE5]] @@ -2406,7 +2406,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP59:%.*]] = extractelement <4 x i1> [[TMP39]], i32 3 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP59]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF6]]: -; CHECK-INTERLEAVED-NEXT: [[TMP61:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP3]] +; CHECK-INTERLEAVED-NEXT: [[TMP61:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP61]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP63:%.*]] = insertelement <4 x i32> [[TMP58]], i32 [[TMP62]], i32 3 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE7]] @@ -2415,7 +2415,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP65:%.*]] = extractelement <4 x i1> [[TMP40]], i32 0 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP65]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF8]]: -; CHECK-INTERLEAVED-NEXT: [[TMP67:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP4]] +; CHECK-INTERLEAVED-NEXT: [[TMP67:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP69:%.*]] = insertelement <4 x i32> poison, i32 [[TMP68]], i32 0 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE9]] @@ -2424,7 +2424,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = extractelement <4 x i1> [[TMP40]], i32 1 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP71]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF10]]: -; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP5]] ; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = insertelement <4 x i32> [[TMP70]], i32 [[TMP74]], i32 1 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE11]] @@ -2433,7 +2433,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = extractelement <4 x i1> [[TMP40]], i32 2 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP77]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF12]]: -; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP6]] +; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP6]] ; CHECK-INTERLEAVED-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP81:%.*]] = insertelement <4 x i32> [[TMP76]], i32 [[TMP80]], i32 2 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE13]] @@ -2442,7 +2442,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = extractelement <4 x i1> [[TMP40]], i32 3 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP83]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF14]]: -; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP7]] +; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP7]] ; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP87:%.*]] = insertelement <4 x i32> [[TMP82]], i32 [[TMP86]], i32 3 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE15]] @@ -2543,10 +2543,10 @@ define i32 @reduction_add_sub(ptr noalias nocapture %A, ptr noalias nocapture %B ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sub <4 x i32> zeroinitializer, [[WIDE_LOAD3]] @@ -2630,10 +2630,10 @@ define i32 @reduction_sub_add(ptr noalias nocapture %A, ptr noalias nocapture %B ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sub <4 x i32> zeroinitializer, [[WIDE_LOAD]] @@ -2739,7 +2739,7 @@ define i64 @reduction_expression_same_operands(ptr nocapture readonly %x, ptr no ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i16>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i64> @@ -2840,7 +2840,7 @@ define i32 @reduction_expression_ext_mulacc_livein(ptr %a, i16 %c) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16> diff --git a/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll b/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll index daf4cba197cc2..6a8ea3756b6b0 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll @@ -8,8 +8,8 @@ define i32 @reduction_sum(i64 %n, ptr noalias nocapture %A) { ; UF3-NEXT: [[SUM1:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[SUM1_NEXT:%.+]], %vector.body ] ; UF3-NEXT: [[SUM2:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[SUM2_NEXT:%.+]], %vector.body ] ; UF3-NEXT: [[GEP0:%.+]] = getelementptr inbounds i32, ptr %A, i64 [[IV]] -; UF3-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 4 -; UF3-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 8 +; UF3-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 4 +; UF3-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 8 ; UF3-NEXT: [[L0:%.+]] = load <4 x i32>, ptr [[GEP0]], align 4 ; UF3-NEXT: [[L1:%.+]] = load <4 x i32>, ptr [[L_GEP1]], align 4 ; UF3-NEXT: [[L2:%.+]] = load <4 x i32>, ptr [[L_GEP2]], align 4 @@ -34,10 +34,10 @@ define i32 @reduction_sum(i64 %n, ptr noalias nocapture %A) { ; UF5-NEXT: [[SUM3:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[SUM3_NEXT:%.+]], %vector.body ] ; UF5-NEXT: [[SUM4:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[SUM4_NEXT:%.+]], %vector.body ] ; UF5-NEXT: [[GEP0:%.+]] = getelementptr inbounds i32, ptr %A, i64 [[IV]] -; UF5-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 4 -; UF5-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 8 -; UF5-NEXT: [[L_GEP3:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 12 -; UF5-NEXT: [[L_GEP4:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 16 +; UF5-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 4 +; UF5-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 8 +; UF5-NEXT: [[L_GEP3:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 12 +; UF5-NEXT: [[L_GEP4:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 16 ; UF5-NEXT: [[L0:%.+]] = load <4 x i32>, ptr [[GEP0]], align 4 ; UF5-NEXT: [[L1:%.+]] = load <4 x i32>, ptr [[L_GEP1]], align 4 ; UF5-NEXT: [[L2:%.+]] = load <4 x i32>, ptr [[L_GEP2]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll index faca86a41b023..a2649053680d2 100644 --- a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll @@ -36,8 +36,8 @@ define void @reuse_lcssa_phi_for_add_rec1(ptr %head) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[IV_LCSSA]], [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[SRC_2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 -1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x ptr> [[WIDE_LOAD]], <2 x ptr> poison, <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[REVERSE]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll index 0896848905c6c..71c75e52d4050 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll @@ -14,8 +14,8 @@ define i32 @preserve_inbounds(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] @@ -60,8 +60,8 @@ define i32 @preserve_nusw(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr nusw i32, ptr [[TMP2]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr nusw i32, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] @@ -106,8 +106,8 @@ define i32 @drop_nuw(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index 31129d3bcc2f4..d3e291e4f3ed2 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -20,10 +20,10 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4 @@ -74,10 +74,10 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i128 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i128 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4 @@ -134,10 +134,10 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 [[STARTVAL]], [[DOTCAST]] ; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 -4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 @@ -221,10 +221,10 @@ define void @reverse_forward_induction_i64_i8() { ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> @@ -274,10 +274,10 @@ define void @reverse_forward_induction_i64_i8_signed() { ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll index 830bd92c70b16..5a1844ac450e7 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll @@ -39,12 +39,12 @@ define void @test_runtime_check_known_false_after_construction(ptr %start.1, ptr ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], -8 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START_2_DIFF]], i64 [[OFFSET_IDX2]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[NEXT_GEP3]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i32 -3 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[NEXT_GEP3]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP15]], i32 -3 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP15]], i64 -3 ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i64> [[REVERSE]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: store <4 x i64> [[REVERSE4]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll index 1c16ef1114371..af272955abbd2 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll @@ -99,8 +99,8 @@ define void @diff_memcheck_known_false_for_vf_4(ptr %B, ptr %A, ptr %end) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], -8 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll index 25f40be238338..5be2b09a504c0 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll @@ -996,14 +996,14 @@ define void @decreasing_inner_iv(ptr nocapture noundef %dst, ptr nocapture nound ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[TMP0]], [[INDEX]] ; CHECK-NEXT: [[TMP21:%.*]] = add nsw i64 [[OFFSET_IDX]], [[TMP16]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 0 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 -3 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META46:![0-9]+]] ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = add nsw i64 [[OFFSET_IDX]], [[TMP17]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 -3 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4, !alias.scope [[META49:![0-9]+]], !noalias [[META46]] ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = add nsw <4 x i32> [[REVERSE4]], [[REVERSE]] diff --git a/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll b/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll index 7811b17f1b7e1..306bcf336e5af 100644 --- a/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll @@ -24,7 +24,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; NO-IC: %[[T4:.+]] = add nuw nsw i64 [[OFFSET_IDX]], %tmp0 ; NO-IC: %[[T6:.+]] = sub nsw i64 %[[T4]], %x ; NO-IC: %[[T8:.+]] = getelementptr inbounds i32, ptr %a, i64 %[[T6]] -; NO-IC: %[[T12:.+]] = getelementptr inbounds i32, ptr %[[T8]], i32 4 +; NO-IC: %[[T12:.+]] = getelementptr inbounds i32, ptr %[[T8]], i64 4 ; NO-IC: load <4 x i32>, ptr %[[T8]], align 4 ; NO-IC: load <4 x i32>, ptr %[[T12]], align 4 ; NO-IC: br {{.*}}, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll index 64e12cc8c9cb8..fa03c62bb4927 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll @@ -88,7 +88,7 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer @@ -307,7 +307,7 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 ; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer @@ -593,9 +593,9 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE19]] ] ; CHECK-VF4-IC2-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE19]] ] ; CHECK-VF4-IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4 +; CHECK-VF4-IC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]] -; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !alias.scope [[META6]] +; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope [[META6]] ; CHECK-VF4-IC2-NEXT: [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP8:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD5]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]] @@ -947,7 +947,7 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no ; CHECK-VF4-IC2-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll index e4922d3e4f627..2b352abe9f7a1 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll @@ -63,9 +63,9 @@ define i32 @select_const_i32_from_icmp(ptr %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 @@ -249,9 +249,9 @@ define i32 @select_const_i32_from_icmp2(ptr %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 @@ -435,9 +435,9 @@ define i32 @select_i32_from_icmp(ptr %v, i32 %a, i32 %b, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 @@ -621,9 +621,9 @@ define i32 @select_const_i32_from_fcmp_fast(ptr %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 @@ -807,9 +807,9 @@ define i32 @select_const_i32_from_fcmp(ptr %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll index d8e62c7b3b8d4..2183c520d9e81 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll @@ -14,9 +14,9 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4: [[VECTOR_BODY]]: ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP12]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll index 053863117bdc8..bdf73d6a52c22 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll @@ -14,9 +14,9 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4: vector.body: ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 @@ -88,17 +88,17 @@ define i64 @same_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 @@ -183,9 +183,9 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; VF4IC4: vector.body: ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 4 -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 12 +; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 4 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 8 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[NEXT_GEP]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 @@ -268,17 +268,17 @@ define i64 @same_exit_block_post_inc_use() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 @@ -365,17 +365,17 @@ define i64 @diff_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 @@ -467,17 +467,17 @@ define i64 @diff_exit_block_post_inc_use1() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 @@ -569,14 +569,14 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -3 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -4 -; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 -3 -; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -8 -; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -12 -; VF4IC4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 -3 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 0 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -3 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -4 +; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3 +; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -8 +; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -12 +; VF4IC4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i64 -3 ; VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -586,14 +586,14 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP27]], align 1 ; VF4IC4-NEXT: [[REVERSE6:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 0 -; VF4IC4-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 -3 -; VF4IC4-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 -4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP41]], i32 -3 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 -8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 -3 -; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 -12 -; VF4IC4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 -3 +; VF4IC4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 0 +; VF4IC4-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i64 -3 +; VF4IC4-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -4 +; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP41]], i64 -3 +; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -8 +; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -3 +; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -12 +; VF4IC4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 -3 ; VF4IC4-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i8>, ptr [[TMP46]], align 1 ; VF4IC4-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD14]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 @@ -698,17 +698,17 @@ define i8 @same_exit_block_use_loaded_value() { ; VF4IC4: vector.body: ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 @@ -807,14 +807,14 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -3 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -4 -; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 -3 -; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -8 -; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3 -; VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -12 -; VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 -3 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 0 +; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -3 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -4 +; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3 +; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -8 +; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 +; VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -12 +; VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 -3 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 ; VF4IC4-NEXT: [[REVERSE6:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -824,14 +824,14 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1 ; VF4IC4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD6]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i32 -3 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 -4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 -3 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 -8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 -3 -; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 -12 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 -3 +; VF4IC4-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 0 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 -3 +; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -4 +; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 -3 +; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -8 +; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -3 +; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -12 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 -3 ; VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 55682bc410527..f76634d954dd3 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -1573,13 +1573,13 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX1]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD2]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i8> [[REVERSE]], [[REVERSE3]] diff --git a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll index 1e4598e756645..e10cb2794aadb 100644 --- a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll @@ -153,8 +153,8 @@ define void @test2(ptr %dst) { ; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4294967295 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 -1 ; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll index 8123092df1ccc..b9be77ff224ff 100644 --- a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll +++ b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll @@ -53,7 +53,7 @@ define void @struct_return_1xi64_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[IN]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4 ; VF2IC2-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 ; VF2IC2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1 @@ -81,7 +81,7 @@ define void @struct_return_1xi64_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2-NEXT: [[TMP25:%.*]] = extractvalue { <2 x i64> } [[TMP13]], 0 ; VF2IC2-NEXT: [[TMP26:%.*]] = extractvalue { <2 x i64> } [[TMP24]], 0 ; VF2IC2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[OUT_A]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[TMP27]], i32 2 +; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[TMP27]], i64 2 ; VF2IC2-NEXT: store <2 x i64> [[TMP25]], ptr [[TMP27]], align 4 ; VF2IC2-NEXT: store <2 x i64> [[TMP26]], ptr [[TMP29]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -166,7 +166,7 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl ; VF4-NEXT: store <4 x float> [[TMP42]], ptr [[TMP45]], align 4 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF4: [[MIDDLE_BLOCK]]: ; ; VF2IC2-LABEL: define void @struct_return_2xf32_replicate( @@ -178,7 +178,7 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[IN]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4 ; VF2IC2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 ; VF2IC2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1 @@ -224,16 +224,16 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2-NEXT: [[TMP43:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP21]], 1 ; VF2IC2-NEXT: [[TMP44:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP40]], 1 ; VF2IC2-NEXT: [[TMP45:%.*]] = getelementptr inbounds float, ptr [[OUT_A]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP45]], i32 2 +; VF2IC2-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP45]], i64 2 ; VF2IC2-NEXT: store <2 x float> [[TMP41]], ptr [[TMP45]], align 4 ; VF2IC2-NEXT: store <2 x float> [[TMP42]], ptr [[TMP47]], align 4 ; VF2IC2-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[OUT_B]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 2 +; VF2IC2-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 2 ; VF2IC2-NEXT: store <2 x float> [[TMP43]], ptr [[TMP48]], align 4 ; VF2IC2-NEXT: store <2 x float> [[TMP44]], ptr [[TMP50]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF2IC2-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VF2IC2-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2IC2-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF2IC2: [[MIDDLE_BLOCK]]: ; entry: @@ -336,7 +336,7 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl ; VF4-NEXT: store <4 x i32> [[TMP63]], ptr [[TMP64]], align 4 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP66:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VF4-NEXT: br i1 [[TMP66]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF4-NEXT: br i1 [[TMP66]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: [[MIDDLE_BLOCK]]: ; ; VF2IC2-LABEL: define void @struct_return_3xi32_replicate( @@ -348,7 +348,7 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 ; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 ; VF2IC2-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 @@ -408,24 +408,24 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2-NEXT: [[TMP57:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP29]], 0 ; VF2IC2-NEXT: [[TMP58:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP56]], 0 ; VF2IC2-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[DST_A]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[TMP59]], i32 2 +; VF2IC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[TMP59]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[TMP57]], ptr [[TMP59]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[TMP58]], ptr [[TMP61]], align 4 ; VF2IC2-NEXT: [[TMP62:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP29]], 1 ; VF2IC2-NEXT: [[TMP63:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP56]], 1 ; VF2IC2-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[DST_B]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP64]], i32 2 +; VF2IC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP64]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[TMP62]], ptr [[TMP64]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[TMP63]], ptr [[TMP66]], align 4 ; VF2IC2-NEXT: [[TMP67:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP29]], 2 ; VF2IC2-NEXT: [[TMP68:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP56]], 2 ; VF2IC2-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[DST_C]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i32 2 +; VF2IC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[TMP67]], ptr [[TMP69]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[TMP68]], ptr [[TMP71]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF2IC2-NEXT: [[TMP72:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VF2IC2-NEXT: br i1 [[TMP72]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2IC2-NEXT: br i1 [[TMP72]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2IC2: [[MIDDLE_BLOCK]]: ; entry: @@ -580,7 +580,7 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) { ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] ; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; VF2IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 8 ; VF2IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP1]], align 8 ; VF2IC2-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll index 9adcba3c0d024..31c37a8abe845 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll @@ -26,7 +26,7 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) { ; VF8UF1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0:![0-9]+]], !llvm.loop [[LOOP1:![0-9]+]] ; VF8UF1: [[MIDDLE_BLOCK]]: ; VF8UF1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; VF8UF1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH:.*]], !prof [[PROF4:![0-9]+]] +; VF8UF1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH:.*]], !prof [[PROF5:![0-9]+]] ; VF8UF1: [[SCALAR_PH]]: ; VF8UF1-NEXT: br label %[[LOOP:.*]] ; VF8UF1: [[LOOP]]: @@ -38,7 +38,7 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) { ; VF8UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1 ; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !prof [[PROF5:![0-9]+]], !llvm.loop [[LOOP6:![0-9]+]] +; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] ; VF8UF1: [[EXIT]]: ; VF8UF1-NEXT: ret void ; @@ -53,7 +53,7 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) { ; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]] ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index 2f6c2cc275b72..b1dc6bdcb1d38 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -42,7 +42,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; VF8UF2-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer @@ -141,7 +141,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; VF8UF2-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer @@ -257,7 +257,7 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll index 2317af5619749..cce9ed2783e4e 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll @@ -11,7 +11,7 @@ define i64 @remove_loop_region_int_iv_used_outside(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -44,7 +44,7 @@ define i64 @remove_loop_region_int_iv_inc_used_outside(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -78,7 +78,7 @@ define ptr @remove_loop_region_ptr_iv_used_outside(ptr %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 128 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -114,7 +114,7 @@ define ptr @remove_loop_region_ptr_iv_inc_used_outside(ptr %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 128 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index bba459f776050..5da6fc3179043 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -512,10 +512,10 @@ define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias ; VF8UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 -; VF8UF2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i32 8 +; VF8UF2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 8 ; VF8UF2-NEXT: store <8 x i8> [[WIDE_LOAD]], ptr [[DST]], align 1 ; VF8UF2-NEXT: store <8 x i8> [[WIDE_LOAD1]], ptr [[TMP5]], align 1 ; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -1140,7 +1140,7 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16 ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; VF8UF2-NEXT: [[TMP2:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) diff --git a/llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll b/llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll new file mode 100644 index 0000000000000..4f459209b3683 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 6 +; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=4 -S %s | FileCheck %s + +target datalayout = "p1:16:16" + +define void @vector_pointer_gep_idxty_addrspace(ptr addrspace(1) noalias %a, ptr addrspace(1) noalias %b) { +; CHECK-LABEL: define void @vector_pointer_gep_idxty_addrspace( +; CHECK-SAME: ptr addrspace(1) noalias [[A:%.*]], ptr addrspace(1) noalias [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i16 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i16 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i16 6 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP0]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[WIDE_LOAD1]], splat (i32 1) +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD2]], splat (i32 1) +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD3]], splat (i32 1) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i16 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i16 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i16 6 +; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr addrspace(1) [[TMP8]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr addrspace(1) [[TMP9]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP10]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP7]], ptr addrspace(1) [[TMP11]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.b.iv = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %iv + %ld.b = load i32, ptr addrspace(1) %gep.b.iv + %add = add i32 %ld.b, 1 + %gep.a.iv = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %iv + store i32 %add, ptr addrspace(1) %gep.a.iv + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv, 1024 + br i1 %exitcond, label %end, label %loop + +end: + ret void +} diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll index db0cc24c287bc..f1a039dc033ac 100644 --- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll @@ -20,10 +20,16 @@ define float @sinf(float %x) { ; CHECK: declare void @acosf(...) +; CHECK: declare noalias noundef ptr @calloc(i64 noundef, i64 noundef) [[CALLOC_ATTRS:#[0-9]+]] + ; CHECK: declare void @fdim(...) ; CHECK: declare void @fdimf(...) ; CHECK: declare void @fdiml(...) +; CHECK: declare void @free(ptr allocptr noundef captures(none)) [[FREE_ATTRS:#[0-9]+]] + +; CHECK: declare noalias noundef ptr @malloc(i64 noundef) [[MALLOC_ATTRS:#[0-9]+]] + ; CHECK: declare void @nan(...) ; CHECK: declare void @nanf(...) ; CHECK: declare void @nanl(...) @@ -58,3 +64,6 @@ define float @sinf(float %x) { ; CHECK: declare void @truncl(...) +; CHECK: attributes [[CALLOC_ATTRS]] = { mustprogress nofree nounwind willreturn allockind("alloc") allocsize(0,1) "alloc-family"="malloc" } +; CHECK: attributes [[FREE_ATTRS]] = { mustprogress nounwind willreturn allockind("free") "alloc-family"="malloc" } +; CHECK: attributes [[MALLOC_ATTRS]] = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" } diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll new file mode 100644 index 0000000000000..6c63f5902f638 --- /dev/null +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll @@ -0,0 +1,22 @@ +; REQUIRES: aarch64-registered-target, arm-registered-target, x86-registered-target + +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=i386-apple-macosx10.5 < %s | FileCheck -check-prefix=HAS-MEMSET-PATTERN %s +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=i386-apple-macosx10.4 < %s | FileCheck -check-prefix=NO-MEMSET-PATTERN %s + +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=x86_64-apple-macosx10.5 < %s | FileCheck -check-prefix=HAS-MEMSET-PATTERN %s +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=x86_64-apple-macosx10.4 < %s | FileCheck -check-prefix=NO-MEMSET-PATTERN %s + +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=arm64-apple-ios3 < %s | FileCheck -check-prefix=HAS-MEMSET-PATTERN %s +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=arm64-apple-ios2 < %s | FileCheck -check-prefix=NO-MEMSET-PATTERN %s + +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=thumbv7-apple-ios3 < %s | FileCheck -check-prefix=HAS-MEMSET-PATTERN %s +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=thumbv7-apple-ios2 < %s | FileCheck -check-prefix=NO-MEMSET-PATTERN %s + +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=arm64_32-apple-watchos < %s | FileCheck -check-prefix=HAS-MEMSET-PATTERN %s +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=armv7k-apple-watchos < %s | FileCheck -check-prefix=HAS-MEMSET-PATTERN %s + +; HAS-MEMSET-PATTERN: declare void @memset_pattern16(...) +; HAS-MEMSET-PATTERN: declare void @memset_pattern4(...) +; HAS-MEMSET-PATTERN: declare void @memset_pattern8(...) + +; NO-MEMSET-PATTERN-NOT: memset_pattern diff --git a/llvm/test/tools/opt/infer-data-layout-target-abi.ll b/llvm/test/tools/opt/infer-data-layout-target-abi.ll new file mode 100644 index 0000000000000..45be56393ac11 --- /dev/null +++ b/llvm/test/tools/opt/infer-data-layout-target-abi.ll @@ -0,0 +1,9 @@ +; REQUIRES: mips-registered-target +;; Check that we infer the correct datalayout from a target triple +; RUN: opt -mtriple=mips64-- -S -passes=no-op-module -target-abi=n32 < %s | FileCheck -check-prefix=N32 %s +; RUN: opt -mtriple=mips64-- -S -passes=no-op-module -target-abi=n64 < %s | FileCheck -check-prefix=N64 %s + +target datalayout = "" + +; N32: target datalayout = "E-m:e-p:32:32-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +; N64: target datalayout = "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp index afc20bed25914..b544ae7a35c66 100644 --- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp +++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp @@ -119,7 +119,7 @@ Instruction::create(const MCInstrInfo &InstrInfo, Operand.IsDef = (OpIndex < Description->getNumDefs()); Operand.IsEarlyClobber = (Description->getOperandConstraint(OpIndex, MCOI::EARLY_CLOBBER) != -1); - // TODO(gchatelet): Handle isLookupPtrRegClass. + // TODO(gchatelet): Handle LookupRegClassByHwMode. if (OpInfo.RegClass >= 0) Operand.Tracker = &RATC.getRegisterClass(OpInfo.RegClass); int TiedToIndex = Description->getOperandConstraint(OpIndex, MCOI::TIED_TO); diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp index 63c47151389b5..f8be9f16aada6 100644 --- a/llvm/tools/opt/optdriver.cpp +++ b/llvm/tools/opt/optdriver.cpp @@ -37,6 +37,7 @@ #include "llvm/InitializePasses.h" #include "llvm/LinkAllIR.h" #include "llvm/LinkAllPasses.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Passes/PassPlugin.h" #include "llvm/Remarks/HotnessThresholdParser.h" @@ -516,6 +517,8 @@ optMain(int argc, char **argv, codegen::MaybeEnableStatistics(); + StringRef ABIName = mc::getABIName(); // FIXME: Handle module flag. + // Load the input module... auto SetDataLayout = [&](StringRef IRTriple, StringRef IRLayout) -> std::optional { @@ -541,7 +544,7 @@ optMain(int argc, char **argv, Triple TT(TripleStr); - std::string Str = TT.computeDataLayout(); + std::string Str = TT.computeDataLayout(ABIName); if (Str.empty()) { errs() << argv[0] << ": warning: failed to infer data layout from target triple\n"; @@ -677,9 +680,7 @@ optMain(int argc, char **argv, RTLIB::RuntimeLibcallsInfo RTLCI(ModuleTriple, codegen::getExceptionModel(), codegen::getFloatABIForCalls(), - codegen::getEABIVersion(), - "", // FIXME: Get ABI name from MCOptions - VecLib); + codegen::getEABIVersion(), ABIName, VecLib); // The -disable-simplify-libcalls flag actually disables all builtin optzns. if (DisableSimplifyLibCalls) diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp index 8251c8983cc80..364817fa6d030 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp @@ -1830,10 +1830,6 @@ bool TreePatternNode::UpdateNodeTypeFromInst(unsigned ResNo, return UpdateNodeType(ResNo, getValueTypeByHwMode(R, T.getHwModes()), TP); } - // PointerLikeRegClass has a type that is determined at runtime. - if (Operand->isSubClassOf("PointerLikeRegClass")) - return UpdateNodeType(ResNo, MVT::iPTR, TP); - // Both RegisterClass and RegisterOperand operands derive their types from a // register class def. const Record *RC = nullptr; @@ -2413,12 +2409,6 @@ static TypeSetByHwMode getImplicitType(const Record *R, unsigned ResNo, const CodeGenHwModes &CGH = CDP.getTargetInfo().getHwModes(); return TypeSetByHwMode(getValueTypeByHwMode(T, CGH)); } - if (R->isSubClassOf("PointerLikeRegClass")) { - assert(ResNo == 0 && "Regclass can only have one result!"); - TypeSetByHwMode VTS(MVT::iPTR); - TP.getInfer().expandOverloads(VTS); - return VTS; - } if (R->getName() == "node" || R->getName() == "srcvalue" || R->getName() == "zero_reg" || R->getName() == "immAllOnesV" || @@ -3661,8 +3651,7 @@ void CodeGenDAGPatterns::FindPatternInputsAndOutputs( if (Val->getDef()->isSubClassOf("RegisterClassLike") || Val->getDef()->isSubClassOf("ValueType") || - Val->getDef()->isSubClassOf("RegisterOperand") || - Val->getDef()->isSubClassOf("PointerLikeRegClass")) { + Val->getDef()->isSubClassOf("RegisterOperand")) { if (Dest->getName().empty()) I.error("set destination must have a name!"); if (!InstResults.insert_or_assign(Dest->getName(), Dest).second) diff --git a/llvm/utils/TableGen/Common/InstructionEncoding.cpp b/llvm/utils/TableGen/Common/InstructionEncoding.cpp index 30bbac463c0f4..e9c2d93244155 100644 --- a/llvm/utils/TableGen/Common/InstructionEncoding.cpp +++ b/llvm/utils/TableGen/Common/InstructionEncoding.cpp @@ -35,9 +35,6 @@ InstructionEncoding::findOperandDecoderMethod(const Record *Record) { Decoder = "Decode" + Record->getName().str() + "RegisterClass"; } else if (Record->isSubClassOf("RegClassByHwMode")) { Decoder = "Decode" + Record->getName().str() + "RegClassByHwMode"; - } else if (Record->isSubClassOf("PointerLikeRegClass")) { - Decoder = "DecodePointerLikeRegClass" + - utostr(Record->getValueAsInt("RegClassKind")); } return {Decoder, true}; diff --git a/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/llvm/utils/TableGen/DAGISelMatcherGen.cpp index 7f23488cb9df1..d1c63d787de71 100644 --- a/llvm/utils/TableGen/DAGISelMatcherGen.cpp +++ b/llvm/utils/TableGen/DAGISelMatcherGen.cpp @@ -239,7 +239,6 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode &N) { if ( // Handle register references. Nothing to do here, they always match. LeafRec->isSubClassOf("RegisterClassLike") || LeafRec->isSubClassOf("RegisterOperand") || - LeafRec->isSubClassOf("PointerLikeRegClass") || LeafRec->isSubClassOf("SubRegIndex") || // Place holder for SRCVALUE nodes. Nothing to do here. LeafRec->getName() == "srcvalue") diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index 843b63070959c..9cd6ad28b1be4 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -162,21 +162,29 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) { Res += ", "; } else if (OpR->isSubClassOf("RegisterClass")) Res += getQualifiedName(OpR) + "RegClassID, "; - else if (OpR->isSubClassOf("PointerLikeRegClass")) - Res += utostr(OpR->getValueAsInt("RegClassKind")) + ", "; - else + else if (OpR->isSubClassOf("PointerLikeRegClass")) { + if (Inst.isPseudo) { + // TODO: Verify this is a fixed pseudo + PrintError(Inst.TheDef, + "missing target override for pseudoinstruction " + "using PointerLikeRegClass"); + PrintNote(OpR->getLoc(), + "target should define equivalent instruction " + "with RegisterClassLike replacement; (use " + "RemapAllTargetPseudoPointerOperands?)"); + } else { + PrintError(Inst.TheDef, + "non-pseudoinstruction user of PointerLikeRegClass"); + } + } else // -1 means the operand does not have a fixed register class. Res += "-1, "; // Fill in applicable flags. Res += "0"; - if (OpR->isSubClassOf("RegClassByHwMode")) { + if (OpR->isSubClassOf("RegClassByHwMode")) Res += "|(1<isSubClassOf("PointerLikeRegClass")) { - // Ptr value whose register class is resolved via callback. - Res += "|(1<(createCanonicalizerPass()); + pm.addNestedPass(createCSEPass()); // gpu-module-to-binary { GpuModuleToBinaryPassOptions gpuToModuleBinOptions; diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 4ac3e75b9c1ce..020b2aa68a357 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -1563,6 +1563,7 @@ cc_library( ":basic", ":config", ":driver_options_inc_gen", + ":frontend", ":lex", ":options", ":parse", @@ -1718,7 +1719,6 @@ cc_library( ":ast", ":basic", ":config", - ":driver", ":driver_options_inc_gen", ":edit", ":lex",