diff --git a/.ci/monolithic-windows.sh b/.ci/monolithic-windows.sh
index 219979dd3e36e..5fb8f69528e89 100755
--- a/.ci/monolithic-windows.sh
+++ b/.ci/monolithic-windows.sh
@@ -23,8 +23,8 @@ runtimes_targets="${4}"
 start-group "CMake"
 pip install -q -r "${MONOREPO_ROOT}"/.ci/all_requirements.txt
 
-export CC=cl
-export CXX=cl
+export CC=C:/clang/clang-msvc/bin/clang-cl.exe
+export CXX=C:/clang/clang-msvc/bin/clang-cl.exe
 export LD=link
 
 # The CMAKE_*_LINKER_FLAGS to disable the manifest come from research
@@ -49,6 +49,7 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
       -D CMAKE_EXE_LINKER_FLAGS="/MANIFEST:NO" \
       -D CMAKE_MODULE_LINKER_FLAGS="/MANIFEST:NO" \
       -D CMAKE_SHARED_LINKER_FLAGS="/MANIFEST:NO" \
+      -D CMAKE_CXX_FLAGS="-Wno-c++98-compat -Wno-c++14-compat -Wno-unsafe-buffer-usage -Wno-old-style-cast" \
       -D LLVM_ENABLE_RUNTIMES="${runtimes}"
 
 start-group "ninja"
diff --git a/.ci/utils.sh b/.ci/utils.sh
index dc8ce9b9a4214..540acfa8d5cc5 100644
--- a/.ci/utils.sh
+++ b/.ci/utils.sh
@@ -40,13 +40,17 @@ function at-exit {
   fi
 
   if [[ "$retcode" != "0" ]]; then
-    python "${MONOREPO_ROOT}"/.ci/premerge_advisor_upload.py \
-      $(git rev-parse HEAD~1) $GITHUB_RUN_NUMBER \
-      "${BUILD_DIR}"/test-results.*.xml "${MONOREPO_ROOT}"/ninja*.log
     if [[ "$GITHUB_ACTIONS" != "" ]]; then
       python "${MONOREPO_ROOT}"/.ci/premerge_advisor_explain.py \
         $(git rev-parse HEAD~1) "${BUILD_DIR}"/test-results.*.xml \
         "${MONOREPO_ROOT}"/ninja*.log
+      python "${MONOREPO_ROOT}"/.ci/premerge_advisor_upload.py \
+        $(git rev-parse HEAD~1) $GITHUB_RUN_NUMBER \
+        "${BUILD_DIR}"/test-results.*.xml "${MONOREPO_ROOT}"/ninja*.log
+    else
+      python "${MONOREPO_ROOT}"/.ci/premerge_advisor_upload.py \
+        $(git rev-parse HEAD) $BUILDBOT_BUILDNUMBER \
+        "${BUILD_DIR}"/test-results.*.xml "${MONOREPO_ROOT}"/ninja*.log
     fi
   fi
 }
diff --git a/.github/workflows/containers/github-action-ci-tooling/Dockerfile b/.github/workflows/containers/github-action-ci-tooling/Dockerfile
index 9d2aaf6bbd48a..8aaa2e88f2bab 100644
--- a/.github/workflows/containers/github-action-ci-tooling/Dockerfile
+++ b/.github/workflows/containers/github-action-ci-tooling/Dockerfile
@@ -37,6 +37,14 @@ RUN apt-get update && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
+# Create a new user with id 1001 as that is the user id that
+# Github Actions uses to perform the checkout action.
+RUN useradd gha -u 1001 -m -s /bin/bash
+RUN adduser gha sudo
+RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+# Don't set USER gha right away because we still need to install packages
+# as root in 'ci-container-code-format' and 'ci-container-code-lint' containers
+
 
 FROM base AS ci-container-code-format
 ARG LLVM_VERSION
@@ -51,6 +59,8 @@ ENV PATH=${LLVM_SYSROOT}/bin:${PATH}
 COPY llvm/utils/git/requirements_formatting.txt requirements_formatting.txt
 RUN pip install -r requirements_formatting.txt --break-system-packages && \
     rm requirements_formatting.txt
+USER gha
+WORKDIR /home/gha
 
 
 FROM base AS ci-container-code-lint
@@ -80,3 +90,5 @@ RUN apt-get update && \
 COPY llvm/utils/git/requirements_linting.txt requirements_linting.txt
 RUN pip install -r requirements_linting.txt --break-system-packages && \
     rm requirements_linting.txt
+USER gha
+WORKDIR /home/gha
diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml
index 2b85d8b59869c..ac0689b4d3243 100644
--- a/.github/workflows/pr-code-format.yml
+++ b/.github/workflows/pr-code-format.yml
@@ -25,14 +25,6 @@ jobs:
         with:
           fetch-depth: 2
 
-      # We need to set the repo checkout as safe, otherwise tj-actions/changed-files
-      # will fail due to the changed ownership inside the container.
-      # TODO(boomanaiden154): We should probably fix this by having the default user
-      # in the container have the same ID as the GHA user on the host.
-      - name: Set Safe Directory
-        run: |
-          chown -R root $(pwd)
-
       - name: Get changed files
         id: changed-files
         uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
diff --git a/.github/workflows/pr-code-lint.yml b/.github/workflows/pr-code-lint.yml
index e67b518149c2c..8ba9378703739 100644
--- a/.github/workflows/pr-code-lint.yml
+++ b/.github/workflows/pr-code-lint.yml
@@ -31,11 +31,6 @@ jobs:
         with:
           fetch-depth: 2
       
-      # FIXME: same as in ".github/workflows/pr-code-format.yml"
-      - name: Set Safe Directory
-        run: |
-          chown -R root $(pwd)
-      
       - name: Get changed files
         id: changed-files
         uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index e82b16f24c73f..fb22ad3c90af4 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -11,7 +11,7 @@ Introduction
 ============
 
 The Clang Compiler is an open-source compiler for the C family of
-programming languages, aiming to be the best in class implementation of
+programming languages, aiming to be the best-in-class implementation of
 these languages. Clang builds on the LLVM optimizer and code generator,
 allowing it to provide high-quality optimization and code generation
 support for many targets. For more general information, please see the
@@ -56,7 +56,7 @@ migration from GCC to Clang. In most cases, code "just works".
 Clang also provides an alternative driver, :ref:`clang-cl`, that is designed
 to be compatible with the Visual C++ compiler, cl.exe.
 
-In addition to language specific features, Clang has a variety of
+In addition to language-specific features, Clang has a variety of
 features that depend on what CPU architecture or operating system is
 being compiled for. Please see the :ref:`Target-Specific Features and
 Limitations <target_features>` section for more details.
@@ -299,7 +299,7 @@ output format of the diagnostics that it generates.
 
    This option controls the output format of the filename, line number,
    and column printed in diagnostic messages. The options, and their
-   affect on formatting a simple conversion diagnostic, follow:
+   effect on formatting a simple conversion diagnostic, follow:
 
    **clang** (default)
        ::
@@ -360,7 +360,7 @@ output format of the diagnostics that it generates.
          t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' [-Wformat,Format String]
 
    This category can be used by clients that want to group diagnostics
-   by category, so it should be a high level category. We want dozens
+   by category, so it should be a high-level category. We want dozens
    of these, not hundreds or thousands of them.
 
 .. _opt_fsave-optimization-record:
@@ -750,7 +750,7 @@ control the crash diagnostics.
 
   Disable auto-generation of preprocessed source files during a clang crash.
 
-  The -fno-crash-diagnostics flag can be helpful for speeding the process
+  The ``-fno-crash-diagnostics`` flag can be helpful for speeding the process
   of generating a delta reduced test case.
 
 .. option:: -fcrash-diagnostics-dir=<dir>
@@ -779,7 +779,7 @@ Options to Emit Optimization Reports
 ------------------------------------
 
 Optimization reports trace, at a high-level, all the major decisions
-done by compiler transformations. For instance, when the inliner
+made by compiler transformations. For instance, when the inliner
 decides to inline function ``foo()`` into ``bar()``, or the loop unroller
 decides to unroll a loop N times, or the vectorizer decides to
 vectorize a loop body.
@@ -845,11 +845,11 @@ compilations steps.
 
 .. option:: -fproc-stat-report=
 
-  This option requests driver to print used memory and execution time of each
+  This option requests the driver to print used memory and execution time of each
   compilation step. The ``clang`` driver during execution calls different tools,
   like compiler, assembler, linker etc. With this option the driver reports
   total execution time, the execution time spent in user mode and peak memory
-  usage of each the called tool. Value of the option specifies where the report
+  usage of each called tool. Value of the option specifies where the report
   is sent to. If it specifies a regular file, the data are saved to this file in
   CSV format:
 
@@ -869,7 +869,7 @@ compilations steps.
   * peak memory usage in Kb.
 
   It is possible to specify this option without any value. In this case statistics
-  are printed on standard output in human readable format:
+  are printed on standard output in human-readable format:
 
   .. code-block:: console
 
@@ -884,7 +884,7 @@ compilations steps.
 
   You can also use environment variables to control the process statistics reporting.
   Setting ``CC_PRINT_PROC_STAT`` to ``1`` enables the feature, the report goes to
-  stdout in human readable format.
+  stdout in human-readable format.
   Setting ``CC_PRINT_PROC_STAT_FILE`` to a fully qualified file path makes it report
   process statistics to the given file in the CSV format. Specifying a relative
   path will likely lead to multiple files with the same name created in different
@@ -922,7 +922,7 @@ Clang options that don't fit neatly into other categories.
   most filenames can be written to the file without any special formatting.
   Different Make tools will treat different sets of characters as "special"
   and use different conventions for telling the Make tool that the character
-  is actually part of the filename. Normally Clang uses backslash to "escape"
+  is actually part of the filename. Normally, Clang uses backslash to "escape"
   a special character, which is the convention used by GNU Make. The -MV
   option tells Clang to put double-quotes around the entire filename, which
   is the convention used by NMake and Jom.
@@ -957,7 +957,7 @@ Configuration files
 
 Configuration files group command-line options and allow all of them to be
 specified just by referencing the configuration file. They may be used, for
-example, to collect options required to tune compilation for particular
+example, to collect options required to tune compilation for a particular
 target, such as ``-L``, ``-I``, ``-l``, ``--sysroot``, codegen options, etc.
 
 Configuration files can be either specified on the command line or loaded
@@ -986,7 +986,7 @@ either during build or during runtime. At build time, use
 ``CLANG_CONFIG_FILE_USER_DIR`` and ``CLANG_CONFIG_FILE_SYSTEM_DIR``. At run
 time use the ``--config-user-dir=`` and ``--config-system-dir=`` command line
 options. Specifying config directories at runtime overrides the config
-directories set at build time The first file found is used. It is an error if
+directories set at build time. The first file found is used. It is an error if
 the required file cannot be found.
 
 The default configuration files are searched for in the same directories
@@ -996,7 +996,7 @@ the ``--no-default-config`` flag.
 
 First, the algorithm searches for a configuration file named
 ``<triple>-<driver>.cfg`` where `triple` is the triple for the target being
-built for, and `driver` is the name of the currently used driver. The algorithm
+built, and `driver` is the name of the currently used driver. The algorithm
 first attempts to use the canonical name for the driver used, then falls back
 to the one found in the executable name.
 
@@ -1047,7 +1047,7 @@ It is not an error if either of these files is not found.
 The configuration file consists of command-line options specified on one or
 more lines. Lines composed of whitespace characters only are ignored as well as
 lines in which the first non-blank character is ``#``. Long options may be split
-between several lines by a trailing backslash. Here is example of a
+between several lines by a trailing backslash. Here is an example of a
 configuration file:
 
 ::
@@ -1229,7 +1229,7 @@ Clang also allows you to push and pop the current warning state. This is
 particularly useful when writing a header file that will be compiled by
 other people, because you don't know what warning flags they build with.
 
-In the below example :option:`-Wextra-tokens` is ignored for only a single line
+In the example below, :option:`-Wextra-tokens` is ignored for only a single line
 of code, after which the diagnostics return to whatever state had previously
 existed.
 
@@ -1253,7 +1253,7 @@ of warnings, so even when using GCC-compatible #pragmas there is no
 guarantee that they will have identical behaviour on both compilers.
 
 Clang also doesn't yet support GCC behavior for ``#pragma diagnostic pop``
-that doesn't have a corresponding ``#pragma diagnostic push``. In this case
+that doesn't have a corresponding ``#pragma diagnostic push``. In this case,
 GCC pretends that there is a ``#pragma diagnostic push`` at the very beginning
 of the source file, so "unpaired" ``#pragma diagnostic pop`` matches that
 implicit push. This makes a difference for ``#pragma GCC diagnostic ignored``
@@ -1406,7 +1406,7 @@ project even if there are violations in some headers.
   # directory. But it'll still complain for all the other sources, e.g:
   $ cat foo/bar.cc
   #include "dir/include.h" // Clang flags unused declarations here.
-  #include "foo/include.h" // but unused warnings under this source is omitted.
+  #include "foo/include.h" // but unused warnings under this source are omitted.
   #include "next_to_bar_cc.h" // as are unused warnings from this header file.
   // Further, unused warnings in the remainder of bar.cc are also omitted.
 
@@ -1648,7 +1648,7 @@ for more details.
 
 .. option:: -fno-fast-math
 
-   Disable fast-math mode.  This options disables unsafe floating-point
+   Disable fast-math mode.  This option disables unsafe floating-point
    optimizations by preventing the compiler from making any transformations that
    could affect the results.
 
@@ -1766,7 +1766,7 @@ for more details.
 
    * ``fast``: enable fusion across statements disregarding pragmas, breaking
      compliance with the C and C++ standards (default for CUDA).
-   * ``on``: enable C and C++ standard complaint fusion in the same statement
+   * ``on``: enable C and C++ standard compliant fusion in the same statement
      unless dictated by pragmas (default for languages other than CUDA/HIP)
    * ``off``: disable fusion
    * ``fast-honor-pragmas``: fuse across statements unless dictated by pragmas
@@ -1919,7 +1919,7 @@ for more details.
    a single expression of the code.
 
    Valid values are: ``source``, ``double``, and ``extended``.
-   For 64-bit targets, the default value is ``source``. For 32-bit x86 targets
+   For 64-bit targets, the default value is ``source``. For 32-bit x86 targets,
    however, in the case of NETBSD 6.99.26 and under, the default value is
    ``double``; in the case of NETBSD greater than 6.99.26, with NoSSE, the
    default value is ``extended``, with SSE the default value is ``source``.
@@ -3881,9 +3881,9 @@ See :doc:`LanguageExtensions`.
 Differences between various standard modes
 ------------------------------------------
 
-clang supports the -std option, which changes what language mode clang uses.
+clang supports the ``-std`` option, which changes what language mode clang uses.
 The supported modes for C are c89, gnu89, c94, c99, gnu99, c11, gnu11, c17,
-gnu17, c23, gnu23, c2y, gnu2y, and various aliases for those modes. If no -std
+gnu17, c23, gnu23, c2y, gnu2y, and various aliases for those modes. If no ``-std``
 option is specified, clang defaults to gnu17 mode. Many C99 and C11 features
 are supported in earlier modes as a conforming extension, with a warning. Use
 ``-pedantic-errors`` to request an error if a feature from a later standard
@@ -4609,7 +4609,7 @@ codebases.
 
 On ``x86_64-mingw32``, passing i128(by value) is incompatible with the
 Microsoft x64 calling convention. You might need to tweak
-``WinX86_64ABIInfo::classify()`` in lib/CodeGen/Targets/X86.cpp.
+``WinX86_64ABIInfo::classify()`` in ``lib/CodeGen/Targets/X86.cpp``.
 
 For the X86 target, clang supports the `-m16` command line
 argument which enables 16-bit code output. This is broadly similar to
@@ -4760,8 +4760,8 @@ is imported, the linker will generate fixup code for reading or writing to the
 variable.
 
 When multiple toc-data options are used, the last option used has the affect.
-For example: -mno-tocdata=g5,g1 -mtocdata=g1,g2 -mno-tocdata=g2 -mtocdata=g3,g4
-results in -mtocdata=g1,g3,g4
+For example: ``-mno-tocdata=g5,g1 -mtocdata=g1,g2 -mno-tocdata=g2 -mtocdata=g3,g4``
+results in ``-mtocdata=g1,g3,g4``
 
 Names of variables not having external linkage will be ignored.
 
@@ -5143,16 +5143,16 @@ Execute ``clang-cl /?`` to see a list of supported options:
                               Instrument only functions from files where names match any regex separated by a semi-colon
       -fprofile-generate=<dirname>
                               Generate instrumented code to collect execution counts into a raw profile file in the directory specified by the argument. The filename uses default_%m.profraw pattern
-                              (overridden by LLVM_PROFILE_FILE env var)
+                              (overridden by ``LLVM_PROFILE_FILE`` env var)
       -fprofile-generate
                               Generate instrumented code to collect execution counts into default_%m.profraw file
-                              (overridden by '=' form of option or LLVM_PROFILE_FILE env var)
+                              (overridden by '=' form of option or ``LLVM_PROFILE_FILE`` env var)
       -fprofile-instr-generate=<file_name_pattern>
                               Generate instrumented code to collect execution counts into the file whose name pattern is specified as the argument
-                              (overridden by LLVM_PROFILE_FILE env var)
+                              (overridden by ``LLVM_PROFILE_FILE`` env var)
       -fprofile-instr-generate
                               Generate instrumented code to collect execution counts into default.profraw file
-                              (overridden by '=' form of option or LLVM_PROFILE_FILE env var)
+                              (overridden by '=' form of option or ``LLVM_PROFILE_FILE`` env var)
       -fprofile-instr-use=<value>
                               Use instrumentation data for coverage testing or profile-guided optimization
       -fprofile-use=<value>
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 369ac3dc37b56..48ef8be9fb782 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -150,11 +150,9 @@ struct MissingFeatures {
   static bool zeroSizeRecordMembers() { return false; }
 
   // Coroutines
-  static bool coroAllocBuiltinCall() { return false; }
-  static bool coroBeginBuiltinCall() { return false; }
   static bool coroEndBuiltinCall() { return false; }
-  static bool coroSizeBuiltinCall() { return false; }
   static bool coroutineFrame() { return false; }
+  static bool emitBodyAndFallthrough() { return false; }
 
   // Various handling of deferred processing in CIRGenModule.
   static bool cgmRelease() { return false; }
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 189798f71dbad..52904c72d1cfc 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2681,11 +2681,6 @@ class Sema final : public SemaBase {
   /// function without this attribute.
   bool DiscardingCFIUncheckedCallee(QualType From, QualType To) const;
 
-  /// Returns true if `From` is a function or pointer to a function without the
-  /// `cfi_unchecked_callee` attribute but `To` is a function or pointer to
-  /// function with this attribute.
-  bool AddingCFIUncheckedCallee(QualType From, QualType To) const;
-
   /// This function calls Action when it determines that E designates a
   /// misaligned member due to the packed attribute. This is used to emit
   /// local diagnostics like in reference binding.
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 62fa04e15c717..e35100ffe4b6b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -449,10 +449,15 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
   }
   case Builtin::BI__builtin_coro_free:
   case Builtin::BI__builtin_coro_size: {
-    cgm.errorNYI(e->getSourceRange(),
-                 "BI__builtin_coro_free, BI__builtin_coro_size NYI");
-    assert(!cir::MissingFeatures::coroSizeBuiltinCall());
-    return getUndefRValue(e->getType());
+    GlobalDecl gd{fd};
+    mlir::Type ty = cgm.getTypes().getFunctionType(
+        cgm.getTypes().arrangeGlobalDeclaration(gd));
+    const auto *nd = cast<NamedDecl>(gd.getDecl());
+    cir::FuncOp fnOp =
+        cgm.getOrCreateCIRFunction(nd->getName(), ty, gd, /*ForVTable=*/false);
+    fnOp.setBuiltin(true);
+    return emitCall(e->getCallee()->getType(), CIRGenCallee::forDirect(fnOp), e,
+                    returnValue);
   }
   case Builtin::BI__builtin_prefetch: {
     auto evaluateOperandAsInt = [&](const Expr *arg) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp
index c25cce4ab33b3..8723a6e502b38 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp
@@ -15,6 +15,7 @@
 #include "clang/AST/StmtCXX.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
 
 using namespace clang;
 using namespace clang::CIRGen;
@@ -23,6 +24,9 @@ struct clang::CIRGen::CGCoroData {
   // Stores the __builtin_coro_id emitted in the function so that we can supply
   // it as the first argument to other builtins.
   cir::CallOp coroId = nullptr;
+
+  // Stores the result of __builtin_coro_begin call.
+  mlir::Value coroBegin = nullptr;
 };
 
 // Defining these here allows to keep CGCoroData private to this file.
@@ -63,6 +67,46 @@ cir::CallOp CIRGenFunction::emitCoroIDBuiltinCall(mlir::Location loc,
                                                nullPtr, nullPtr, nullPtr});
 }
 
+cir::CallOp CIRGenFunction::emitCoroAllocBuiltinCall(mlir::Location loc) {
+  cir::BoolType boolTy = builder.getBoolTy();
+
+  mlir::Operation *builtin = cgm.getGlobalValue(cgm.builtinCoroAlloc);
+
+  cir::FuncOp fnOp;
+  if (!builtin) {
+    fnOp = cgm.createCIRBuiltinFunction(loc, cgm.builtinCoroAlloc,
+                                        cir::FuncType::get({UInt32Ty}, boolTy),
+                                        /*fd=*/nullptr);
+    assert(fnOp && "should always succeed");
+  } else {
+    fnOp = cast<cir::FuncOp>(builtin);
+  }
+
+  return builder.createCallOp(
+      loc, fnOp, mlir::ValueRange{curCoro.data->coroId.getResult()});
+}
+
+cir::CallOp
+CIRGenFunction::emitCoroBeginBuiltinCall(mlir::Location loc,
+                                         mlir::Value coroframeAddr) {
+  mlir::Operation *builtin = cgm.getGlobalValue(cgm.builtinCoroBegin);
+
+  cir::FuncOp fnOp;
+  if (!builtin) {
+    fnOp = cgm.createCIRBuiltinFunction(
+        loc, cgm.builtinCoroBegin,
+        cir::FuncType::get({UInt32Ty, VoidPtrTy}, VoidPtrTy),
+        /*fd=*/nullptr);
+    assert(fnOp && "should always succeed");
+  } else {
+    fnOp = cast<cir::FuncOp>(builtin);
+  }
+
+  return builder.createCallOp(
+      loc, fnOp,
+      mlir::ValueRange{curCoro.data->coroId.getResult(), coroframeAddr});
+}
+
 mlir::LogicalResult
 CIRGenFunction::emitCoroutineBody(const CoroutineBodyStmt &s) {
   mlir::Location openCurlyLoc = getLoc(s.getBeginLoc());
@@ -73,10 +117,39 @@ CIRGenFunction::emitCoroutineBody(const CoroutineBodyStmt &s) {
   cir::CallOp coroId = emitCoroIDBuiltinCall(openCurlyLoc, nullPtrCst);
   createCoroData(*this, curCoro, coroId);
 
-  assert(!cir::MissingFeatures::coroAllocBuiltinCall());
-
-  assert(!cir::MissingFeatures::coroBeginBuiltinCall());
+  // Backend is allowed to elide memory allocations, to help it, emit
+  // auto mem = coro.alloc() ? 0 : ... allocation code ...;
+  cir::CallOp coroAlloc = emitCoroAllocBuiltinCall(openCurlyLoc);
+
+  // Initialize address of coroutine frame to null
+  CanQualType astVoidPtrTy = cgm.getASTContext().VoidPtrTy;
+  mlir::Type allocaTy = convertTypeForMem(astVoidPtrTy);
+  Address coroFrame =
+      createTempAlloca(allocaTy, getContext().getTypeAlignInChars(astVoidPtrTy),
+                       openCurlyLoc, "__coro_frame_addr",
+                       /*ArraySize=*/nullptr);
+
+  mlir::Value storeAddr = coroFrame.getPointer();
+  builder.CIRBaseBuilderTy::createStore(openCurlyLoc, nullPtrCst, storeAddr);
+  cir::IfOp::create(
+      builder, openCurlyLoc, coroAlloc.getResult(),
+      /*withElseRegion=*/false,
+      /*thenBuilder=*/[&](mlir::OpBuilder &b, mlir::Location loc) {
+        builder.CIRBaseBuilderTy::createStore(
+            loc, emitScalarExpr(s.getAllocate()), storeAddr);
+        cir::YieldOp::create(builder, loc);
+      });
+  curCoro.data->coroBegin =
+      emitCoroBeginBuiltinCall(
+          openCurlyLoc,
+          cir::LoadOp::create(builder, openCurlyLoc, allocaTy, storeAddr))
+          .getResult();
+
+  // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided.
+  if (s.getReturnStmtOnAllocFailure())
+    cgm.errorNYI("handle coroutine return alloc failure");
 
   assert(!cir::MissingFeatures::generateDebugInfo());
+  assert(!cir::MissingFeatures::emitBodyAndFallthrough());
   return mlir::success();
 }
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index a8ffab79e8398..d7911302df45c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1332,6 +1332,9 @@ class CIRGenFunction : public CIRGenTypeCache {
   mlir::LogicalResult emitCoroutineBody(const CoroutineBodyStmt &s);
   cir::CallOp emitCoroEndBuiltinCall(mlir::Location loc, mlir::Value nullPtr);
   cir::CallOp emitCoroIDBuiltinCall(mlir::Location loc, mlir::Value nullPtr);
+  cir::CallOp emitCoroAllocBuiltinCall(mlir::Location loc);
+  cir::CallOp emitCoroBeginBuiltinCall(mlir::Location loc,
+                                       mlir::Value coroframeAddr);
 
   void emitDestroy(Address addr, QualType type, Destroyer *destroyer);
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 1fc116d98a858..186913d1bac9d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -496,6 +496,8 @@ class CIRGenModule : public CIRGenTypeCache {
                                     bool assumeConvergent = false);
 
   static constexpr const char *builtinCoroId = "__builtin_coro_id";
+  static constexpr const char *builtinCoroAlloc = "__builtin_coro_alloc";
+  static constexpr const char *builtinCoroBegin = "__builtin_coro_begin";
 
   /// Given a builtin id for a function like "__builtin_fabsf", return a
   /// Function* for "fabsf".
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 1d0dfd0b9c151..a8a9c51952fbd 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3791,12 +3791,18 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts,
   if (Current.is(TT_FunctionDeclarationName))
     return true;
 
-  if (Current.isNoneOf(tok::identifier, tok::kw_operator))
+  if (!Current.Tok.getIdentifierInfo())
     return false;
 
   const auto *Prev = Current.getPreviousNonComment();
   assert(Prev);
 
+  if (Prev->is(tok::coloncolon))
+    Prev = Prev->Previous;
+
+  if (!Prev)
+    return false;
+
   const auto &Previous = *Prev;
 
   if (const auto *PrevPrev = Previous.getPreviousNonComment();
@@ -3845,8 +3851,6 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts,
 
   // Find parentheses of parameter list.
   if (Current.is(tok::kw_operator)) {
-    if (Line.startsWith(tok::kw_friend))
-      return true;
     if (Previous.Tok.getIdentifierInfo() &&
         Previous.isNoneOf(tok::kw_return, tok::kw_co_return)) {
       return true;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index ab893a5e9bf11..380a852207c21 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -12375,14 +12375,9 @@ static void DiagnoseMixedUnicodeImplicitConversion(Sema &S, const Type *Source,
   }
 }
 
-enum CFIUncheckedCalleeChange {
-  None,
-  Adding,
-  Discarding,
-};
-
-static CFIUncheckedCalleeChange AdjustingCFIUncheckedCallee(QualType From,
-                                                            QualType To) {
+bool Sema::DiscardingCFIUncheckedCallee(QualType From, QualType To) const {
+  From = Context.getCanonicalType(From);
+  To = Context.getCanonicalType(To);
   QualType MaybePointee = From->getPointeeType();
   if (!MaybePointee.isNull() && MaybePointee->getAs<FunctionType>())
     From = MaybePointee;
@@ -12394,25 +12389,10 @@ static CFIUncheckedCalleeChange AdjustingCFIUncheckedCallee(QualType From,
     if (const auto *ToFn = To->getAs<FunctionType>()) {
       if (FromFn->getCFIUncheckedCalleeAttr() &&
           !ToFn->getCFIUncheckedCalleeAttr())
-        return Discarding;
-      if (!FromFn->getCFIUncheckedCalleeAttr() &&
-          ToFn->getCFIUncheckedCalleeAttr())
-        return Adding;
+        return true;
     }
   }
-  return None;
-}
-
-bool Sema::DiscardingCFIUncheckedCallee(QualType From, QualType To) const {
-  From = Context.getCanonicalType(From);
-  To = Context.getCanonicalType(To);
-  return ::AdjustingCFIUncheckedCallee(From, To) == Discarding;
-}
-
-bool Sema::AddingCFIUncheckedCallee(QualType From, QualType To) const {
-  From = Context.getCanonicalType(From);
-  To = Context.getCanonicalType(To);
-  return ::AdjustingCFIUncheckedCallee(From, To) == Adding;
+  return false;
 }
 
 void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC,
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 06e5dab35cc3e..6d011239ec813 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -2533,15 +2533,12 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
 
   SCS.setToType(2, FromType);
 
-  // If we have not converted the argument type to the parameter type,
-  // this is a bad conversion sequence, unless we're resolving an overload in C.
-  //
-  // Permit conversions from a function without `cfi_unchecked_callee` to a
-  // function with `cfi_unchecked_callee`.
-  if (CanonFrom == CanonTo || S.AddingCFIUncheckedCallee(CanonFrom, CanonTo))
+  if (CanonFrom == CanonTo)
     return true;
 
-  if ((S.getLangOpts().CPlusPlus || !InOverloadResolution))
+  // If we have not converted the argument type to the parameter type,
+  // this is a bad conversion sequence, unless we're resolving an overload in C.
+  if (S.getLangOpts().CPlusPlus || !InOverloadResolution)
     return false;
 
   ExprResult ER = ExprResult{From};
diff --git a/clang/test/CIR/CodeGen/coro-task.cpp b/clang/test/CIR/CodeGen/coro-task.cpp
index 1fc7d77be2bce..265325f82d7f7 100644
--- a/clang/test/CIR/CodeGen/coro-task.cpp
+++ b/clang/test/CIR/CodeGen/coro-task.cpp
@@ -106,6 +106,9 @@ co_invoke_fn co_invoke;
 // CIR-NEXT: cir.global external @_ZN5folly4coro9co_invokeE = #cir.zero : !rec_folly3A3Acoro3A3Aco_invoke_fn
 
 // CIR: cir.func builtin private @__builtin_coro_id(!u32i, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>) -> !u32i
+// CIR:  cir.func builtin private @__builtin_coro_alloc(!u32i) -> !cir.bool
+// CIR:  cir.func builtin private @__builtin_coro_size() -> !u64i
+// CIR:  cir.func builtin private @__builtin_coro_begin(!u32i, !cir.ptr<!void>) -> !cir.ptr<!void>
 
 using VoidTask = folly::coro::Task<void>;
 
@@ -114,10 +117,24 @@ VoidTask silly_task() {
 }
 
 // CIR: cir.func coroutine dso_local @_Z10silly_taskv() -> ![[VoidTask]]
-// CHECK: %[[#VoidTaskAddr:]] = cir.alloca ![[VoidTask]], {{.*}}, ["__retval"]
+// CIR: %[[VoidTaskAddr:.*]] = cir.alloca ![[VoidTask]], {{.*}}, ["__retval"]
+// CIR: %[[SavedFrameAddr:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["__coro_frame_addr"]
 
 // Get coroutine id with __builtin_coro_id.
 
 // CIR: %[[NullPtr:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
 // CIR: %[[Align:.*]] = cir.const #cir.int<16> : !u32i
 // CIR: %[[CoroId:.*]] = cir.call @__builtin_coro_id(%[[Align]], %[[NullPtr]], %[[NullPtr]], %[[NullPtr]])
+
+// Perform allocation calling operator 'new' depending on __builtin_coro_alloc and
+// call __builtin_coro_begin for the final coroutine frame address.
+
+// CIR: %[[ShouldAlloc:.*]] = cir.call @__builtin_coro_alloc(%[[CoroId]]) : (!u32i) -> !cir.bool
+// CIR: cir.store{{.*}} %[[NullPtr]], %[[SavedFrameAddr]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CIR: cir.if %[[ShouldAlloc]] {
+// CIR:   %[[CoroSize:.*]] = cir.call @__builtin_coro_size() : () -> !u64i
+// CIR:   %[[AllocAddr:.*]] = cir.call @_Znwm(%[[CoroSize]]) : (!u64i) -> !cir.ptr<!void>
+// CIR:   cir.store{{.*}} %[[AllocAddr]], %[[SavedFrameAddr]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CIR: }
+// CIR: %[[Load0:.*]] = cir.load{{.*}} %[[SavedFrameAddr]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR: %[[CoroFrameAddr:.*]] = cir.call @__builtin_coro_begin(%[[CoroId]], %[[Load0]])
diff --git a/clang/test/Frontend/cfi-unchecked-callee-attribute.cpp b/clang/test/Frontend/cfi-unchecked-callee-attribute.cpp
index 072f217ff7b19..a5a17dd5a4d82 100644
--- a/clang/test/Frontend/cfi-unchecked-callee-attribute.cpp
+++ b/clang/test/Frontend/cfi-unchecked-callee-attribute.cpp
@@ -9,6 +9,7 @@ void (*checked_ptr)(void) = unchecked;  // expected-warning{{implicit conversion
 void (CFI_UNCHECKED_CALLEE *unchecked_ptr)(void) = unchecked;
 void (CFI_UNCHECKED_CALLEE *from_normal)(void) = checked;
 void (CFI_UNCHECKED_CALLEE *c_no_function_decay)(void) = &unchecked;
+void (CFI_UNCHECKED_CALLEE __attribute__((noreturn)) *other_conflict)(void) = &checked; // expected-error{{cannot initialize a variable of type 'void (*)() __attribute__((noreturn)) __attribute__((cfi_unchecked_callee))' with an rvalue of type 'void (*)()'}}
 void (CFI_UNCHECKED_CALLEE *arr[10])(void);
 void (*cfi_elem)(void) = arr[1];  // expected-warning{{implicit conversion from 'void (*)() __attribute__((cfi_unchecked_callee))' to 'void (*)()' discards 'cfi_unchecked_callee' attribute}}
 void (CFI_UNCHECKED_CALLEE *cfi_unchecked_elem)(void) = arr[1];
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index ca99940890984..f3637383a0a65 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -1129,11 +1129,6 @@ TEST_F(TokenAnnotatorTest, UnderstandsOverloadedOperators) {
   ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   // Not TT_FunctionDeclarationName.
   EXPECT_TOKEN(Tokens[3], tok::kw_operator, TT_Unknown);
-
-  Tokens = annotate("SomeAPI::operator()();");
-  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
-  // Not TT_FunctionDeclarationName.
-  EXPECT_TOKEN(Tokens[2], tok::kw_operator, TT_Unknown);
 }
 
 TEST_F(TokenAnnotatorTest, OverloadedOperatorInTemplate) {
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index be64ef3770c60..bb47f31060885 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -3274,13 +3274,13 @@ struct FunctionReference {
 // R1521 call-stmt -> CALL procedure-designator [ chevrons ]
 //         [( [actual-arg-spec-list] )]
 // (CUDA) chevrons -> <<< * | scalar-expr, scalar-expr [,
-//          scalar-int-expr [, scalar-int-expr ] ] >>>
+//          scalar-expr [, scalar-int-expr ] ] >>>
 struct CallStmt {
   BOILERPLATE(CallStmt);
   WRAPPER_CLASS(StarOrExpr, std::optional<ScalarExpr>);
   struct Chevrons {
     TUPLE_CLASS_BOILERPLATE(Chevrons);
-    std::tuple<StarOrExpr, ScalarExpr, std::optional<ScalarIntExpr>,
+    std::tuple<StarOrExpr, ScalarExpr, std::optional<ScalarExpr>,
         std::optional<ScalarIntExpr>>
         t;
   };
diff --git a/flang/lib/Parser/program-parsers.cpp b/flang/lib/Parser/program-parsers.cpp
index 92c0a64b39a9d..740dbbfab9db7 100644
--- a/flang/lib/Parser/program-parsers.cpp
+++ b/flang/lib/Parser/program-parsers.cpp
@@ -484,7 +484,7 @@ constexpr auto starOrExpr{
         applyFunction(presentOptional<ScalarExpr>, scalarExpr))};
 TYPE_PARSER(extension<LanguageFeature::CUDA>(
     "<<<" >> construct<CallStmt::Chevrons>(starOrExpr, ", " >> scalarExpr,
-                 maybe("," >> scalarIntExpr), maybe("," >> scalarIntExpr)) /
+                 maybe("," >> scalarExpr), maybe("," >> scalarIntExpr)) /
         ">>>"))
 constexpr auto actualArgSpecList{optionalList(actualArgSpec)};
 TYPE_CONTEXT_PARSER("CALL statement"_en_US,
diff --git a/flang/test/Lower/CUDA/cuda-kernel-calls.cuf b/flang/test/Lower/CUDA/cuda-kernel-calls.cuf
index 71e594e4742ec..e0941f74072ba 100644
--- a/flang/test/Lower/CUDA/cuda-kernel-calls.cuf
+++ b/flang/test/Lower/CUDA/cuda-kernel-calls.cuf
@@ -16,6 +16,7 @@ contains
   subroutine host()
     real, device :: a
     integer(8) :: stream
+    integer(4) :: nbytes
 
 ! CHECK-LABEL: func.func @_QMtest_callPhost()
 ! CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMtest_callFhostEa"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
@@ -57,6 +58,10 @@ contains
     call dev_kernel1<<<*,32,0,stream>>>(a)
 ! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel1<<<%c-1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c32{{.*}}, %c1{{.*}}, %c1{{.*}}, %c0{{.*}}, %{{.*}} : !fir.ref<i64>>>>(%{{.*}}) : (!fir.ref<f32>)
 
+    call dev_kernel1<<<*, 32, 0.8 * nbytes>>>(a)
+! CHECK: %[[MUL:.*]] = arith.mulf %{{.*}}, %{{.*}} fastmath<contract> : f32
+! CHECK: %[[BYTES:.*]] = fir.convert %[[MUL]] : (f32) -> i32
+! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel1<<<%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[BYTES]]>>>(%{{.*}}) : (!fir.ref<f32>)
   end
 
 end
diff --git a/flang/test/Parser/cuf-sanity-common b/flang/test/Parser/cuf-sanity-common
index 816e03bed7220..2348c2edf3b73 100644
--- a/flang/test/Parser/cuf-sanity-common
+++ b/flang/test/Parser/cuf-sanity-common
@@ -43,6 +43,7 @@ module m
     call globalsub<<<1, 2>>>
     call globalsub<<<1, 2, 3>>>
     call globalsub<<<1, 2, 3, 4>>>
+    call globalsub<<<1, 2, 0.9*10, 4>>>
     call globalsub<<<*,5>>>
     allocate(pa(32), pinned = isPinned)
   end subroutine
diff --git a/flang/test/Parser/cuf-sanity-tree.CUF b/flang/test/Parser/cuf-sanity-tree.CUF
index 83d7540b8dec5..b4d53f27cf395 100644
--- a/flang/test/Parser/cuf-sanity-tree.CUF
+++ b/flang/test/Parser/cuf-sanity-tree.CUF
@@ -178,7 +178,7 @@ include "cuf-sanity-common"
 !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '1'
 !CHECK: | | | | | | Scalar -> Expr = '2_4'
 !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '2'
-!CHECK: | | | | | | Scalar -> Integer -> Expr = '3_4'
+!CHECK: | | | | | | Scalar -> Expr = '3_4'
 !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '3'
 !CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> CallStmt = 'CALL globalsub<<<1_4,2_4,3_4,4_4>>>()'
 !CHECK: | | | | | Call
@@ -188,10 +188,27 @@ include "cuf-sanity-common"
 !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '1'
 !CHECK: | | | | | | Scalar -> Expr = '2_4'
 !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '2'
-!CHECK: | | | | | | Scalar -> Integer -> Expr = '3_4'
+!CHECK: | | | | | | Scalar -> Expr = '3_4'
 !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '3'
 !CHECK: | | | | | | Scalar -> Integer -> Expr = '4_4'
 !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '4'
+!CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> CallStmt = 'CALL globalsub<<<1_4,2_4,9._4,4_4>>>()'
+!CHECK: | | | | | Call
+!CHECK: | | | | | | ProcedureDesignator -> Name = 'globalsub'
+!CHECK: | | | | | Chevrons
+!CHECK: | | | | | | StarOrExpr -> Scalar -> Expr = '1_4'
+!CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '1'
+!CHECK: | | | | | | Scalar -> Expr = '2_4'
+!CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '2'
+!CHECK: | | | | | | Scalar -> Expr = '9._4'
+!CHECK: | | | | | | | Multiply
+!CHECK: | | | | | | | | Expr = '8.9999997615814208984375e-1_4'
+!CHECK: | | | | | | | | | LiteralConstant -> RealLiteralConstant
+!CHECK: | | | | | | | | | | Real = '0.9'
+!CHECK: | | | | | | | | Expr = '10_4'
+!CHECK: | | | | | | | | | LiteralConstant -> IntLiteralConstant = '10'
+!CHECK: | | | | | | Scalar -> Integer -> Expr = '4_4'
+!CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '4'
 !CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AllocateStmt
 !CHECK: | | | | | Allocation
 !CHECK: | | | | | | AllocateObject = 'pa'
diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h
index f4a09237ce897..c375df248154f 100644
--- a/lldb/include/lldb/Target/Target.h
+++ b/lldb/include/lldb/Target/Target.h
@@ -1356,7 +1356,11 @@ class Target : public std::enable_shared_from_this<Target>,
     StopHook(const StopHook &rhs);
     virtual ~StopHook() = default;
 
-    enum class StopHookKind  : uint32_t { CommandBased = 0, ScriptBased };
+    enum class StopHookKind : uint32_t {
+      CommandBased = 0,
+      ScriptBased,
+      CodeBased,
+    };
     enum class StopHookResult : uint32_t {
       KeepStopped = 0,
       RequestContinue,
@@ -1403,6 +1407,12 @@ class Target : public std::enable_shared_from_this<Target>,
 
     bool GetRunAtInitialStop() const { return m_at_initial_stop; }
 
+    void SetSuppressOutput(bool suppress_output) {
+      m_suppress_output = suppress_output;
+    }
+
+    bool GetSuppressOutput() const { return m_suppress_output; }
+
     void GetDescription(Stream &s, lldb::DescriptionLevel level) const;
     virtual void GetSubclassDescription(Stream &s,
                                         lldb::DescriptionLevel level) const = 0;
@@ -1414,6 +1424,7 @@ class Target : public std::enable_shared_from_this<Target>,
     bool m_active = true;
     bool m_auto_continue = false;
     bool m_at_initial_stop = true;
+    bool m_suppress_output = false;
 
     StopHook(lldb::TargetSP target_sp, lldb::user_id_t uid);
   };
@@ -1433,8 +1444,8 @@ class Target : public std::enable_shared_from_this<Target>,
 
   private:
     StringList m_commands;
-    // Use CreateStopHook to make a new empty stop hook. The GetCommandPointer
-    // and fill it with commands, and SetSpecifier to set the specifier shared
+    // Use CreateStopHook to make a new empty stop hook. Use SetActionFromString
+    // to fill it with commands, and SetSpecifier to set the specifier shared
     // pointer (can be null, that will match anything.)
     StopHookCommandLine(lldb::TargetSP target_sp, lldb::user_id_t uid)
         : StopHook(target_sp, uid) {}
@@ -1460,19 +1471,56 @@ class Target : public std::enable_shared_from_this<Target>,
     StructuredDataImpl m_extra_args;
     lldb::ScriptedStopHookInterfaceSP m_interface_sp;
 
-    /// Use CreateStopHook to make a new empty stop hook. The GetCommandPointer
-    /// and fill it with commands, and SetSpecifier to set the specifier shared
-    /// pointer (can be null, that will match anything.)
+    /// Use CreateStopHook to make a new empty stop hook. Use SetScriptCallback
+    /// to set the script to execute, and SetSpecifier to set the specifier
+    /// shared pointer (can be null, that will match anything.)
     StopHookScripted(lldb::TargetSP target_sp, lldb::user_id_t uid)
         : StopHook(target_sp, uid) {}
     friend class Target;
   };
 
+  class StopHookCoded : public StopHook {
+  public:
+    ~StopHookCoded() override = default;
+
+    using HandleStopCallback = StopHookResult(ExecutionContext &exc_ctx,
+                                              lldb::StreamSP output);
+
+    void SetCallback(llvm::StringRef name, HandleStopCallback *callback) {
+      m_name = name;
+      m_callback = callback;
+    }
+
+    StopHookResult HandleStop(ExecutionContext &exc_ctx,
+                              lldb::StreamSP output) override {
+      return m_callback(exc_ctx, output);
+    }
+
+    void GetSubclassDescription(Stream &s,
+                                lldb::DescriptionLevel level) const override {
+      s.Indent();
+      s.Printf("%s (built-in)\n", m_name.c_str());
+    }
+
+  private:
+    std::string m_name;
+    HandleStopCallback *m_callback;
+
+    /// Use CreateStopHook to make a new empty stop hook. Use SetCallback to set
+    /// the callback to execute, and SetSpecifier to set the specifier shared
+    /// pointer (can be null, that will match anything.)
+    StopHookCoded(lldb::TargetSP target_sp, lldb::user_id_t uid)
+        : StopHook(target_sp, uid) {}
+    friend class Target;
+  };
+
+  void RegisterInternalStopHooks();
+
   typedef std::shared_ptr<StopHook> StopHookSP;
 
   /// Add an empty stop hook to the Target's stop hook list, and returns a
-  /// shared pointer to it in new_hook. Returns the id of the new hook.
-  StopHookSP CreateStopHook(StopHook::StopHookKind kind);
+  /// shared pointer to the new hook.
+  StopHookSP CreateStopHook(StopHook::StopHookKind kind, bool internal = false);
 
   /// If you tried to create a stop hook, and that failed, call this to
   /// remove the stop hook, as it will also reset the stop hook counter.
@@ -1484,8 +1532,6 @@ class Target : public std::enable_shared_from_this<Target>,
   // control over the process for the first time.
   bool RunStopHooks(bool at_initial_stop = false);
 
-  size_t GetStopHookSize();
-
   bool SetSuppresStopHooks(bool suppress) {
     bool old_value = m_suppress_stop_hooks;
     m_suppress_stop_hooks = suppress;
@@ -1504,19 +1550,7 @@ class Target : public std::enable_shared_from_this<Target>,
 
   void SetAllStopHooksActiveState(bool active_state);
 
-  size_t GetNumStopHooks() const { return m_stop_hooks.size(); }
-
-  StopHookSP GetStopHookAtIndex(size_t index) {
-    if (index >= GetNumStopHooks())
-      return StopHookSP();
-    StopHookCollection::iterator pos = m_stop_hooks.begin();
-
-    while (index > 0) {
-      pos++;
-      index--;
-    }
-    return (*pos).second;
-  }
+  const std::vector<StopHookSP> GetStopHooks(bool internal = false) const;
 
   lldb::PlatformSP GetPlatform() { return m_platform_sp; }
 
@@ -1656,6 +1690,7 @@ class Target : public std::enable_shared_from_this<Target>,
   typedef std::map<lldb::user_id_t, StopHookSP> StopHookCollection;
   StopHookCollection m_stop_hooks;
   lldb::user_id_t m_stop_hook_next_id;
+  std::vector<StopHookSP> m_internal_stop_hooks;
   uint32_t m_latest_stop_hook_id; /// This records the last natural stop at
                                   /// which we ran a stop-hook.
   bool m_valid;
diff --git a/lldb/source/Commands/CommandCompletions.cpp b/lldb/source/Commands/CommandCompletions.cpp
index b2fc893e13fe3..c60d30326a3b4 100644
--- a/lldb/source/Commands/CommandCompletions.cpp
+++ b/lldb/source/Commands/CommandCompletions.cpp
@@ -777,13 +777,11 @@ void CommandCompletions::StopHookIDs(CommandInterpreter &interpreter,
   if (!target_sp)
     return;
 
-  const size_t num = target_sp->GetNumStopHooks();
-  for (size_t idx = 0; idx < num; ++idx) {
+  for (auto &stophook_sp : target_sp->GetStopHooks()) {
     StreamString strm;
     // The value 11 is an offset to make the completion description looks
     // neater.
     strm.SetIndentLevel(11);
-    const Target::StopHookSP stophook_sp = target_sp->GetStopHookAtIndex(idx);
     stophook_sp->GetDescription(strm, lldb::eDescriptionLevelInitial);
     request.TryCompleteCurrentArg(std::to_string(stophook_sp->GetID()),
                                   strm.GetString());
diff --git a/lldb/source/Commands/CommandObjectBreakpoint.cpp b/lldb/source/Commands/CommandObjectBreakpoint.cpp
index de0a7e7093411..5a5512610cd33 100644
--- a/lldb/source/Commands/CommandObjectBreakpoint.cpp
+++ b/lldb/source/Commands/CommandObjectBreakpoint.cpp
@@ -1114,9 +1114,7 @@ class CommandObjectBreakpointList : public CommandObjectParsed {
   CommandObjectBreakpointList(CommandInterpreter &interpreter)
       : CommandObjectParsed(
             interpreter, "breakpoint list",
-            "List some or all breakpoints at configurable levels of detail.",
-            nullptr) {
-    CommandArgumentData bp_id_arg;
+            "List some or all breakpoints at configurable levels of detail.") {
 
     // Define the first (and only) variant of this arg.
     AddSimpleArgumentList(eArgTypeBreakpointID, eArgRepeatOptional);
diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp
index c59d02812f328..8de6521e65b25 100644
--- a/lldb/source/Commands/CommandObjectTarget.cpp
+++ b/lldb/source/Commands/CommandObjectTarget.cpp
@@ -5223,33 +5223,72 @@ class CommandObjectTargetStopHookEnableDisable : public CommandObjectParsed {
 #pragma mark CommandObjectTargetStopHookList
 
 // CommandObjectTargetStopHookList
+#define LLDB_OPTIONS_target_stop_hook_list
+#include "CommandOptions.inc"
 
 class CommandObjectTargetStopHookList : public CommandObjectParsed {
 public:
   CommandObjectTargetStopHookList(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "target stop-hook list",
-                            "List all stop-hooks.", "target stop-hook list") {}
+                            "List all stop-hooks.") {}
 
   ~CommandObjectTargetStopHookList() override = default;
 
+  Options *GetOptions() override { return &m_options; }
+
+  class CommandOptions : public Options {
+  public:
+    CommandOptions() = default;
+    ~CommandOptions() override = default;
+
+    Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg,
+                          ExecutionContext *execution_context) override {
+      Status error;
+      const int short_option = m_getopt_table[option_idx].val;
+
+      switch (short_option) {
+      case 'i':
+        m_internal = true;
+        break;
+      default:
+        llvm_unreachable("Unimplemented option");
+      }
+
+      return error;
+    }
+
+    void OptionParsingStarting(ExecutionContext *execution_context) override {
+      m_internal = false;
+    }
+
+    llvm::ArrayRef<OptionDefinition> GetDefinitions() override {
+      return llvm::ArrayRef(g_target_stop_hook_list_options);
+    }
+
+    // Instance variables to hold the values for command options.
+    bool m_internal = false;
+  };
+
 protected:
   void DoExecute(Args &command, CommandReturnObject &result) override {
     Target &target = GetTarget();
 
-    size_t num_hooks = target.GetNumStopHooks();
-    if (num_hooks == 0) {
-      result.GetOutputStream().PutCString("No stop hooks.\n");
-    } else {
-      for (size_t i = 0; i < num_hooks; i++) {
-        Target::StopHookSP this_hook = target.GetStopHookAtIndex(i);
-        if (i > 0)
-          result.GetOutputStream().PutCString("\n");
-        this_hook->GetDescription(result.GetOutputStream(),
-                                  eDescriptionLevelFull);
-      }
+    bool printed_hook = false;
+    for (auto &hook : target.GetStopHooks(m_options.m_internal)) {
+      if (printed_hook)
+        result.GetOutputStream().PutCString("\n");
+      hook->GetDescription(result.GetOutputStream(), eDescriptionLevelFull);
+      printed_hook = true;
     }
+
+    if (!printed_hook)
+      result.GetOutputStream().PutCString("No stop hooks.\n");
+
     result.SetStatus(eReturnStatusSuccessFinishResult);
   }
+
+private:
+  CommandOptions m_options;
 };
 
 #pragma mark CommandObjectMultiwordTargetStopHooks
diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td
index a9f054e1d3d45..ed061312e2bb4 100644
--- a/lldb/source/Commands/Options.td
+++ b/lldb/source/Commands/Options.td
@@ -77,7 +77,7 @@ let Command = "breakpoint list" in {
   // FIXME: We need to add an "internal" command, and then add this sort of
   // thing to it. But I need to see it for now, and don't want to wait.
   def blist_internal : Option<"internal", "i">,
-                       Desc<"Show debugger ${i}nternal breakpoints">;
+                       Desc<"Show debugger ${i}nternal breakpoints.">;
   def blist_brief : Option<"brief", "b">,
                     Group<1>,
                     Desc<"Give a ${b}rief description of the breakpoint (no "
@@ -1686,7 +1686,7 @@ let Command = "target modules lookup" in {
                                        "match, if a best match is available.">;
 }
 
-let Command = "target stop hook add" in {
+let Command = "target stop_hook add" in {
   def target_stop_hook_add_one_liner
       : Option<"one-liner", "o">,
         GroupRange<1, 3>,
@@ -1762,6 +1762,12 @@ let Command = "target stop hook add" in {
              "Defaults to true.">;
 }
 
+let Command = "target stop_hook list" in {
+  def target_stop_hook_list_internal
+      : Option<"internal", "i">,
+        Desc<"Show debugger ${i}nternal stop hooks.">;
+}
+
 let Command = "thread backtrace" in {
   def thread_backtrace_count : Option<"count", "c">,
                                Group<1>,
diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp
index e224a12e33463..d070c3d953d4a 100644
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@@ -183,8 +183,8 @@ Target::Target(Debugger &debugger, const ArchSpec &target_arch,
       m_watchpoint_list(), m_process_sp(), m_search_filter_sp(),
       m_image_search_paths(ImageSearchPathsChanged, this),
       m_source_manager_up(), m_stop_hooks(), m_stop_hook_next_id(0),
-      m_latest_stop_hook_id(0), m_valid(true), m_suppress_stop_hooks(false),
-      m_is_dummy_target(is_dummy_target),
+      m_internal_stop_hooks(), m_latest_stop_hook_id(0), m_valid(true),
+      m_suppress_stop_hooks(false), m_is_dummy_target(is_dummy_target),
       m_target_unique_id(g_target_unique_id++),
       m_frame_recognizer_manager_up(
           std::make_unique<StackFrameRecognizerManager>()) {
@@ -217,6 +217,7 @@ Target::~Target() {
 void Target::PrimeFromDummyTarget(Target &target) {
   m_stop_hooks = target.m_stop_hooks;
   m_stop_hook_next_id = target.m_stop_hook_next_id;
+  m_internal_stop_hooks = target.m_internal_stop_hooks;
 
   for (const auto &breakpoint_sp : target.m_breakpoint_list.Breakpoints()) {
     if (breakpoint_sp->IsInternal())
@@ -383,6 +384,7 @@ void Target::Destroy() {
   m_image_search_paths.Clear(notify);
   m_stop_hooks.clear();
   m_stop_hook_next_id = 0;
+  m_internal_stop_hooks.clear();
   m_suppress_stop_hooks = false;
   m_repl_map.clear();
   Args signal_args;
@@ -3041,8 +3043,9 @@ SourceManager &Target::GetSourceManager() {
   return *m_source_manager_up;
 }
 
-Target::StopHookSP Target::CreateStopHook(StopHook::StopHookKind kind) {
-  lldb::user_id_t new_uid = ++m_stop_hook_next_id;
+Target::StopHookSP Target::CreateStopHook(StopHook::StopHookKind kind,
+                                          bool internal) {
+  user_id_t new_uid = (internal ? LLDB_INVALID_UID : ++m_stop_hook_next_id);
   Target::StopHookSP stop_hook_sp;
   switch (kind) {
   case StopHook::StopHookKind::CommandBased:
@@ -3051,8 +3054,14 @@ Target::StopHookSP Target::CreateStopHook(StopHook::StopHookKind kind) {
   case StopHook::StopHookKind::ScriptBased:
     stop_hook_sp.reset(new StopHookScripted(shared_from_this(), new_uid));
     break;
+  case StopHook::StopHookKind::CodeBased:
+    stop_hook_sp.reset(new StopHookCoded(shared_from_this(), new_uid));
+    break;
   }
-  m_stop_hooks[new_uid] = stop_hook_sp;
+  if (internal)
+    m_internal_stop_hooks.push_back(stop_hook_sp);
+  else
+    m_stop_hooks[new_uid] = stop_hook_sp;
   return stop_hook_sp;
 }
 
@@ -3098,6 +3107,23 @@ void Target::SetAllStopHooksActiveState(bool active_state) {
   }
 }
 
+// FIXME:  Ideally we would like to return a `const &` (const reference) instead
+//   of creating copy here, but that is not possible due to different container
+//   types.  In C++20, we should be able to use `std::ranges::views::values` to
+//   adapt the key-pair entries in the `std::map` (behind `StopHookCollection`)
+//   to avoid creating the copy.
+const std::vector<Target::StopHookSP>
+Target::GetStopHooks(bool internal) const {
+  if (internal)
+    return m_internal_stop_hooks;
+
+  std::vector<StopHookSP> stop_hooks;
+  for (auto &[_, hook] : m_stop_hooks)
+    stop_hooks.push_back(hook);
+
+  return stop_hooks;
+}
+
 bool Target::RunStopHooks(bool at_initial_stop) {
   if (m_suppress_stop_hooks)
     return false;
@@ -3111,16 +3137,20 @@ bool Target::RunStopHooks(bool at_initial_stop) {
   if (!(state == eStateStopped || state == eStateAttaching))
     return false;
 
-  if (m_stop_hooks.empty())
-    return false;
+  auto is_active = [at_initial_stop](StopHookSP hook) {
+    bool should_run_now = (!at_initial_stop || hook->GetRunAtInitialStop());
+    return hook->IsActive() && should_run_now;
+  };
 
-  bool no_active_hooks =
-      llvm::none_of(m_stop_hooks, [at_initial_stop](auto &p) {
-        bool should_run_now =
-            !at_initial_stop || p.second->GetRunAtInitialStop();
-        return p.second->IsActive() && should_run_now;
-      });
-  if (no_active_hooks)
+  // Create list of active internal and user stop hooks.
+  std::vector<StopHookSP> active_hooks;
+  llvm::copy_if(m_internal_stop_hooks, std::back_inserter(active_hooks),
+                is_active);
+  for (auto &[_, hook] : m_stop_hooks) {
+    if (is_active(hook))
+      active_hooks.push_back(hook);
+  }
+  if (active_hooks.empty())
     return false;
 
   // Make sure we check that we are not stopped because of us running a user
@@ -3169,24 +3199,21 @@ bool Target::RunStopHooks(bool at_initial_stop) {
   StreamSP output_sp = m_debugger.GetAsyncOutputStream();
   auto on_exit = llvm::make_scope_exit([output_sp] { output_sp->Flush(); });
 
-  bool print_hook_header = (m_stop_hooks.size() != 1);
-  bool print_thread_header = (num_exe_ctx != 1);
+  size_t num_hooks_with_output = llvm::count_if(
+      active_hooks, [](auto h) { return !h->GetSuppressOutput(); });
+  bool print_hook_header = (num_hooks_with_output > 1);
+  bool print_thread_header = (num_exe_ctx > 1);
   bool should_stop = false;
   bool requested_continue = false;
 
-  for (auto stop_entry : m_stop_hooks) {
-    StopHookSP cur_hook_sp = stop_entry.second;
-    if (!cur_hook_sp->IsActive())
-      continue;
-    if (at_initial_stop && !cur_hook_sp->GetRunAtInitialStop())
-      continue;
-
+  for (auto cur_hook_sp : active_hooks) {
     bool any_thread_matched = false;
     for (auto exc_ctx : exc_ctx_with_reasons) {
       if (!cur_hook_sp->ExecutionContextPasses(exc_ctx))
         continue;
 
-      if (print_hook_header && !any_thread_matched) {
+      bool suppress_output = cur_hook_sp->GetSuppressOutput();
+      if (print_hook_header && !any_thread_matched && !suppress_output) {
         StreamString s;
         cur_hook_sp->GetDescription(s, eDescriptionLevelBrief);
         if (s.GetSize() != 0)
@@ -3197,7 +3224,7 @@ bool Target::RunStopHooks(bool at_initial_stop) {
         any_thread_matched = true;
       }
 
-      if (print_thread_header)
+      if (print_thread_header && !suppress_output)
         output_sp->Printf("-- Thread %d\n",
                           exc_ctx.GetThreadPtr()->GetIndexID());
 
diff --git a/lldb/test/API/macosx/posix_spawn/TestLaunchProcessPosixSpawn.py b/lldb/test/API/macosx/posix_spawn/TestLaunchProcessPosixSpawn.py
index 8a321b2ff6324..0f40dfd09c958 100644
--- a/lldb/test/API/macosx/posix_spawn/TestLaunchProcessPosixSpawn.py
+++ b/lldb/test/API/macosx/posix_spawn/TestLaunchProcessPosixSpawn.py
@@ -40,7 +40,7 @@ def run_arch(self, exe, arch):
         launch_info = target.GetLaunchInfo()
         error = lldb.SBError()
         process = target.Launch(launch_info, error)
-        self.assertTrue(error.Success, str(error))
+        self.assertTrue(error.Success(), str(error))
         self.assertState(process.GetState(), lldb.eStateExited)
         self.assertIn("slice: {}".format(arch), process.GetSTDOUT(1000))
 
diff --git a/lldb/test/Shell/ExecControl/StopHook/stop-hook-list.test b/lldb/test/Shell/ExecControl/StopHook/stop-hook-list.test
new file mode 100644
index 0000000000000..42d0a67c60dfa
--- /dev/null
+++ b/lldb/test/Shell/ExecControl/StopHook/stop-hook-list.test
@@ -0,0 +1,70 @@
+# Test stop hook user ID assignment, ordering, and printing.
+#
+# RUN: %lldb -b -s %s | FileCheck %s
+
+# Create some stop hooks
+target stop-hook add -o 'print "Hello"'
+target stop-hook add -o 'print "world,"'
+target stop-hook add -o 'print "nice"'
+target stop-hook add -o 'print "weather"'
+target stop-hook add -o 'print "today!"'
+
+# Print hooks
+target stop-hook list
+
+# CHECK: (lldb) target stop-hook list
+# CHECK: Hook: 1
+# CHECK:   "Hello"
+# CHECK: Hook: 2
+# CHECK:   "world,"
+# CHECK: Hook: 3
+# CHECK:   "nice"
+# CHECK: Hook: 4
+# CHECK:   "weather"
+# CHECK: Hook: 5
+# CHECK:   "today!"
+
+# Delete last hook, then add new one
+target stop-hook delete 5
+target stop-hook add -o 'print "Sunshine,"'
+
+# Stop hook gets new user ID (it is not reused)
+# CHECK: (lldb) target stop-hook add -o 'print "Sunshine,"'
+# CHECK: Stop hook #6 added.
+
+target stop-hook list
+# CHECK: (lldb) target stop-hook list
+# CHECK:     Hook: 4
+# CHECK-NOT: Hook: 5
+# CHECK:     Hook: 6
+
+# Add a few more hooks
+target stop-hook add -o 'print "rain,"'
+target stop-hook add -o 'print "and wind!"'
+target stop-hook add -o 'print "It is all okay!"'
+# CHECK: Stop hook #7 added.
+# CHECK: Stop hook #8 added.
+# CHECK: Stop hook #9 added.
+
+# Delete a few hooks
+target stop-hook delete 1
+target stop-hook delete 3
+target stop-hook delete 7
+target stop-hook delete 9
+
+# Check that the list is still well-ordered
+target stop-hook list
+# CHECK: (lldb) target stop-hook list
+# CHECK-NOT: Hook: 1
+# CHECK:     Hook: 2
+# CHECK:       "world,"
+# CHECK-NOT: Hook: 3
+# CHECK:     Hook: 4
+# CHECK:       "weather"
+# CHECK-NOT: Hook: 5
+# CHECK:     Hook: 6
+# CHECK:       "Sunshine,"
+# CHECK-NOT: Hook: 7
+# CHECK:     Hook: 8
+# CHECK:       "and wind!"
+# CHECK-NOT: Hook: 9
diff --git a/llvm/docs/DirectX/DXILArchitecture.rst b/llvm/docs/DirectX/DXILArchitecture.rst
index 32b1e72deae7c..bce7fdaa386ed 100644
--- a/llvm/docs/DirectX/DXILArchitecture.rst
+++ b/llvm/docs/DirectX/DXILArchitecture.rst
@@ -118,9 +118,10 @@ The passes to generate DXIL IR follow the flow:
 Each of these passes has a defined responsibility:
 
 #. DXILOpLowering translates LLVM intrinsic calls to dx.op calls.
-#. DXILPrepare transforms the DXIL IR to be compatible with LLVM 3.7, and
-   inserts bitcasts to allow typed pointers to be inserted.
-#. DXILTranslateMetadata emits the DXIL Metadata structures.
+#. DXILPrepare updates functions in the DXIL IR to be compatible with LLVM 3.7,
+   namely removing attributes, and inserting bitcasts to allow typed pointers
+   to be inserted.
+#. DXILTranslateMetadata transforms and emits all recognized DXIL Metadata.
 
 The passes to encode DXIL to binary in the DX Container follow the flow:
 
diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 9cdac9c59fa9b..d99b5843c2133 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -2161,6 +2161,16 @@ that are not simple pointers (use :ref:`SmallPtrSet <dss_smallptrset>` for
 pointers).  Note that ``DenseSet`` has the same requirements for the value type that
 :ref:`DenseMap <dss_densemap>` has.
 
+.. _dss_radixtree:
+
+llvm/ADT/RadixTree.h
+^^^^^^^^^^^^^^^^^^^^
+
+``RadixTree`` is a trie-based data structure that stores range-like keys and
+their associated values. It is particularly efficient for storing keys that
+share common prefixes, as it can compress these prefixes to save memory. It
+supports efficient search of matching prefixes.
+
 .. _dss_sparseset:
 
 llvm/ADT/SparseSet.h
diff --git a/llvm/include/llvm/ADT/IndexedMap.h b/llvm/include/llvm/ADT/IndexedMap.h
index 55935a7afdab4..02193c79a6f0c 100644
--- a/llvm/include/llvm/ADT/IndexedMap.h
+++ b/llvm/include/llvm/ADT/IndexedMap.h
@@ -43,40 +43,40 @@ class IndexedMap {
   // is trivially copyable.
   using StorageT = SmallVector<T, 0>;
 
-  StorageT storage_;
-  T nullVal_ = T();
-  ToIndexT toIndex_;
+  StorageT Storage;
+  T NullVal = T();
+  ToIndexT ToIndex;
 
 public:
   IndexedMap() = default;
 
-  explicit IndexedMap(const T &val) : nullVal_(val) {}
+  explicit IndexedMap(const T &Val) : NullVal(Val) {}
 
-  typename StorageT::reference operator[](IndexT n) {
-    assert(toIndex_(n) < storage_.size() && "index out of bounds!");
-    return storage_[toIndex_(n)];
+  typename StorageT::reference operator[](IndexT N) {
+    assert(ToIndex(N) < Storage.size() && "index out of bounds!");
+    return Storage[ToIndex(N)];
   }
 
-  typename StorageT::const_reference operator[](IndexT n) const {
-    assert(toIndex_(n) < storage_.size() && "index out of bounds!");
-    return storage_[toIndex_(n)];
+  typename StorageT::const_reference operator[](IndexT N) const {
+    assert(ToIndex(N) < Storage.size() && "index out of bounds!");
+    return Storage[ToIndex(N)];
   }
 
-  void reserve(typename StorageT::size_type s) { storage_.reserve(s); }
+  void reserve(typename StorageT::size_type S) { Storage.reserve(S); }
 
-  void resize(typename StorageT::size_type s) { storage_.resize(s, nullVal_); }
+  void resize(typename StorageT::size_type S) { Storage.resize(S, NullVal); }
 
-  void clear() { storage_.clear(); }
+  void clear() { Storage.clear(); }
 
-  void grow(IndexT n) {
-    unsigned NewSize = toIndex_(n) + 1;
-    if (NewSize > storage_.size())
+  void grow(IndexT N) {
+    unsigned NewSize = ToIndex(N) + 1;
+    if (NewSize > Storage.size())
       resize(NewSize);
   }
 
-  bool inBounds(IndexT n) const { return toIndex_(n) < storage_.size(); }
+  bool inBounds(IndexT N) const { return ToIndex(N) < Storage.size(); }
 
-  typename StorageT::size_type size() const { return storage_.size(); }
+  typename StorageT::size_type size() const { return Storage.size(); }
 };
 
 } // namespace llvm
diff --git a/llvm/include/llvm/ADT/RadixTree.h b/llvm/include/llvm/ADT/RadixTree.h
new file mode 100644
index 0000000000000..d3c44e4e6345c
--- /dev/null
+++ b/llvm/include/llvm/ADT/RadixTree.h
@@ -0,0 +1,350 @@
+//===-- llvm/ADT/RadixTree.h - Radix Tree implementation --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//===----------------------------------------------------------------------===//
+//
+// This file implements a Radix Tree.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_RADIXTREE_H
+#define LLVM_ADT_RADIXTREE_H
+
+#include "llvm/ADT/ADL.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <limits>
+#include <list>
+#include <utility>
+
+namespace llvm {
+
+/// \brief A Radix Tree implementation.
+///
+/// A Radix Tree (also known as a compact prefix tree or radix trie) is a
+/// data structure that stores a dynamic set or associative array where keys
+/// are strings and values are associated with these keys. Unlike a regular
+/// trie, the edges of a radix tree can be labeled with sequences of characters
+/// as well as single characters. This makes radix trees more efficient for
+/// storing sparse data sets, where many nodes in a regular trie would have
+/// only one child.
+///
+/// This implementation supports arbitrary key types that can be iterated over
+/// (e.g., `std::string`, `std::vector<char>`, `ArrayRef<char>`). The key type
+/// must provide `begin()` and `end()` for iteration.
+///
+/// The tree stores `std::pair<const KeyType, T>` as its value type.
+///
+/// Example usage:
+/// \code
+///   llvm::RadixTree<StringRef, int> Tree;
+///   Tree.emplace("apple", 1);
+///   Tree.emplace("grapefruit", 2);
+///   Tree.emplace("grape", 3);
+///
+///   // Find prefixes
+///   for (const auto &[Key, Value] : Tree.find_prefixes("grapefruit juice")) {
+///     // pair will be {"grape", 3}
+///     // pair will be {"grapefruit", 2}
+///     llvm::outs() << Key << ": " << Value << "\n";
+///   }
+///
+///   // Iterate over all elements
+///   for (const auto &[Key, Value] : Tree)
+///     llvm::outs() << Key << ": " << Value << "\n";
+/// \endcode
+///
+/// \note
+/// The `RadixTree` takes ownership of the `KeyType` and `T` objects
+/// inserted into it. When an element is removed or the tree is destroyed,
+/// these objects will be destructed.
+/// However, if `KeyType` is a reference-like type, e.g., StringRef or range,
+/// the user must guarantee that the referenced data has a lifetime longer than
+/// the tree.
+template <typename KeyType, typename T> class RadixTree {
+public:
+  using key_type = KeyType;
+  using mapped_type = T;
+  using value_type = std::pair<const KeyType, mapped_type>;
+
+private:
+  using KeyConstIteratorType =
+      decltype(adl_begin(std::declval<const key_type &>()));
+  using KeyConstIteratorRangeType = iterator_range<KeyConstIteratorType>;
+  using KeyValueType =
+      remove_cvref_t<decltype(*adl_begin(std::declval<key_type &>()))>;
+  using ContainerType = std::list<value_type>;
+
+  /// Represents an internal node in the Radix Tree.
+  struct Node {
+    KeyConstIteratorRangeType Key{KeyConstIteratorType{},
+                                  KeyConstIteratorType{}};
+    std::vector<Node> Children;
+
+    /// An iterator to the value associated with this node.
+    ///
+    /// If this node does not have a value (i.e., it's an internal node that
+    /// only serves as a path to other values), this iterator will be equal
+    /// to default constructed `ContainerType::iterator()`.
+    typename ContainerType::iterator Value;
+
+    /// The first character of the Key. Used for fast child lookup.
+    KeyValueType KeyFront;
+
+    Node() = default;
+    Node(const KeyConstIteratorRangeType &Key)
+        : Key(Key), KeyFront(*Key.begin()) {
+      assert(!Key.empty());
+    }
+
+    Node(Node &&) = default;
+    Node &operator=(Node &&) = default;
+
+    Node(const Node &) = delete;
+    Node &operator=(const Node &) = delete;
+
+    const Node *findChild(const KeyConstIteratorRangeType &Key) const {
+      if (Key.empty())
+        return nullptr;
+      for (const Node &Child : Children) {
+        assert(!Child.Key.empty()); // Only root can be empty.
+        if (Child.KeyFront == *Key.begin())
+          return &Child;
+      }
+      return nullptr;
+    }
+
+    Node *findChild(const KeyConstIteratorRangeType &Query) {
+      const Node *This = this;
+      return const_cast<Node *>(This->findChild(Query));
+    }
+
+    size_t countNodes() const {
+      size_t R = 1;
+      for (const Node &C : Children)
+        R += C.countNodes();
+      return R;
+    }
+
+    ///
+    /// Splits the current node into two.
+    ///
+    /// This function is used when a new key needs to be inserted that shares
+    /// a common prefix with the current node's key, but then diverges.
+    /// The current `Key` is truncated to the common prefix, and a new child
+    /// node is created for the remainder of the original node's `Key`.
+    ///
+    /// \param SplitPoint An iterator pointing to the character in the current
+    ///                   `Key` where the split should occur.
+    void split(KeyConstIteratorType SplitPoint) {
+      Node Child(make_range(SplitPoint, Key.end()));
+      Key = make_range(Key.begin(), SplitPoint);
+
+      Children.swap(Child.Children);
+      std::swap(Value, Child.Value);
+
+      Children.emplace_back(std::move(Child));
+    }
+  };
+
+  /// Root always corresponds to the empty key, which is the shortest possible
+  /// prefix for everything.
+  Node Root;
+  ContainerType KeyValuePairs;
+
+  /// Finds or creates a new tail or leaf node corresponding to the `Key`.
+  Node &findOrCreate(KeyConstIteratorRangeType Key) {
+    Node *Curr = &Root;
+    if (Key.empty())
+      return *Curr;
+
+    for (;;) {
+      auto [I1, I2] = llvm::mismatch(Key, Curr->Key);
+      Key = make_range(I1, Key.end());
+
+      if (I2 != Curr->Key.end()) {
+        // Match is partial. Either query is too short, or there is mismatching
+        // character. Split either way, and put new node in between of the
+        // current and its children.
+        Curr->split(I2);
+
+        // Split was caused by mismatch, so `findChild` would fail.
+        break;
+      }
+
+      Node *Child = Curr->findChild(Key);
+      if (!Child)
+        break;
+
+      // Move to child with the same first character.
+      Curr = Child;
+    }
+
+    if (Key.empty()) {
+      // The current node completely matches the key, return it.
+      return *Curr;
+    }
+
+    // `Key` is a suffix of original `Key` unmatched by path from the `Root` to
+    // the `Curr`, and we have no candidate in the children to match more.
+    // Create a new one.
+    return Curr->Children.emplace_back(Key);
+  }
+
+  ///
+  /// An iterator for traversing prefixes search results.
+  ///
+  /// This iterator is used by `find_prefixes` to traverse the tree and find
+  /// elements that are prefixes to the given key. It's a forward iterator.
+  ///
+  /// \tparam MappedType The type of the value pointed to by the iterator.
+  ///                    This will be `value_type` for non-const iterators
+  ///                    and `const value_type` for const iterators.
+  template <typename MappedType>
+  class IteratorImpl
+      : public iterator_facade_base<IteratorImpl<MappedType>,
+                                    std::forward_iterator_tag, MappedType> {
+    const Node *Curr = nullptr;
+    KeyConstIteratorRangeType Query{KeyConstIteratorType{},
+                                    KeyConstIteratorType{}};
+
+    void findNextValid() {
+      while (Curr && Curr->Value == typename ContainerType::iterator())
+        advance();
+    }
+
+    void advance() {
+      assert(Curr);
+      if (Query.empty()) {
+        Curr = nullptr;
+        return;
+      }
+
+      Curr = Curr->findChild(Query);
+      if (!Curr) {
+        Curr = nullptr;
+        return;
+      }
+
+      auto [I1, I2] = llvm::mismatch(Query, Curr->Key);
+      if (I2 != Curr->Key.end()) {
+        Curr = nullptr;
+        return;
+      }
+      Query = make_range(I1, Query.end());
+    }
+
+    friend class RadixTree;
+    IteratorImpl(const Node *C, const KeyConstIteratorRangeType &Q)
+        : Curr(C), Query(Q) {
+      findNextValid();
+    }
+
+  public:
+    IteratorImpl() = default;
+
+    MappedType &operator*() const { return *Curr->Value; }
+
+    IteratorImpl &operator++() {
+      advance();
+      findNextValid();
+      return *this;
+    }
+
+    bool operator==(const IteratorImpl &Other) const {
+      return Curr == Other.Curr;
+    }
+  };
+
+public:
+  RadixTree() = default;
+  RadixTree(RadixTree &&) = default;
+  RadixTree &operator=(RadixTree &&) = default;
+
+  using prefix_iterator = IteratorImpl<value_type>;
+  using const_prefix_iterator = IteratorImpl<const value_type>;
+
+  using iterator = typename ContainerType::iterator;
+  using const_iterator = typename ContainerType::const_iterator;
+
+  /// Returns true if the tree is empty.
+  bool empty() const { return KeyValuePairs.empty(); }
+
+  /// Returns the number of elements in the tree.
+  size_t size() const { return KeyValuePairs.size(); }
+
+  /// Returns the number of nodes in the tree.
+  ///
+  /// This function counts all internal nodes in the tree. It can be useful for
+  /// understanding the memory footprint or complexity of the tree structure.
+  size_t countNodes() const { return Root.countNodes(); }
+
+  /// Returns an iterator to the first element.
+  iterator begin() { return KeyValuePairs.begin(); }
+  const_iterator begin() const { return KeyValuePairs.begin(); }
+
+  /// Returns an iterator to the end of the tree.
+  iterator end() { return KeyValuePairs.end(); }
+  const_iterator end() const { return KeyValuePairs.end(); }
+
+  /// Constructs and inserts a new element into the tree.
+  ///
+  /// This function constructs an element in place within the tree. If an
+  /// element with the same key already exists, the insertion fails and the
+  /// function returns an iterator to the existing element along with `false`.
+  /// Otherwise, the new element is inserted and the function returns an
+  /// iterator to the new element along with `true`.
+  ///
+  /// \param Key The key of the element to construct.
+  /// \param Args Arguments to forward to the constructor of the mapped_type.
+  /// \return A pair consisting of an iterator to the inserted element (or to
+  ///         the element that prevented insertion) and a boolean value
+  ///         indicating whether the insertion took place.
+  template <typename... Ts>
+  std::pair<iterator, bool> emplace(key_type &&Key, Ts &&...Args) {
+    // We want to make new `Node` to refer key in the container, not the one
+    // from the argument.
+    // FIXME: Determine that we need a new node, before expanding
+    // `KeyValuePairs`.
+    const value_type &NewValue = KeyValuePairs.emplace_front(
+        std::move(Key), T(std::forward<Ts>(Args)...));
+    Node &Node = findOrCreate(NewValue.first);
+    bool HasValue = Node.Value != typename ContainerType::iterator();
+    if (!HasValue)
+      Node.Value = KeyValuePairs.begin();
+    else
+      KeyValuePairs.pop_front();
+    return {Node.Value, !HasValue};
+  }
+
+  ///
+  /// Finds all elements whose keys are prefixes of the given `Key`.
+  ///
+  /// This function returns an iterator range over all elements in the tree
+  /// whose keys are prefixes of the provided `Key`. For example, if the tree
+  /// contains "abcde", "abc", "abcdefgh", and `Key` is "abcde", this function
+  /// would return iterators to "abcde" and "abc".
+  ///
+  /// \param Key The key to search for prefixes of.
+  /// \return An `iterator_range` of `const_prefix_iterator`s, allowing
+  ///         iteration over the found prefix elements.
+  /// \note The returned iterators reference the `Key` provided by the caller.
+  ///       The caller must ensure that `Key` remains valid for the lifetime
+  ///       of the iterators.
+  iterator_range<const_prefix_iterator>
+  find_prefixes(const key_type &Key) const {
+    return iterator_range<const_prefix_iterator>{
+        const_prefix_iterator(&Root, KeyConstIteratorRangeType(Key)),
+        const_prefix_iterator{}};
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_ADT_RADIXTREE_H
diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h
index 4a9598c734dbf..1889b90c14126 100644
--- a/llvm/include/llvm/ADT/STLForwardCompat.h
+++ b/llvm/include/llvm/ADT/STLForwardCompat.h
@@ -126,7 +126,7 @@ struct detector<std::void_t<Op<Args...>>, Op, Args...> {
 template <template <class...> class Op, class... Args>
 using is_detected = typename detail::detector<void, Op, Args...>::value_t;
 
-struct identity_cxx20 // NOLINT(readability-identifier-naming)
+struct identity // NOLINT(readability-identifier-naming)
 {
   using is_transparent = void;
 
diff --git a/llvm/include/llvm/ADT/SparseMultiSet.h b/llvm/include/llvm/ADT/SparseMultiSet.h
index 5e4e1703373a9..59de4cf6a553b 100644
--- a/llvm/include/llvm/ADT/SparseMultiSet.h
+++ b/llvm/include/llvm/ADT/SparseMultiSet.h
@@ -82,7 +82,7 @@ namespace llvm {
 /// @tparam SparseT     An unsigned integer type. See above.
 ///
 template <typename ValueT, typename KeyT = unsigned,
-          typename KeyFunctorT = identity_cxx20, typename SparseT = uint8_t>
+          typename KeyFunctorT = identity, typename SparseT = uint8_t>
 class SparseMultiSet {
   static_assert(std::is_unsigned_v<SparseT>,
                 "SparseT must be an unsigned integer type");
diff --git a/llvm/include/llvm/ADT/SparseSet.h b/llvm/include/llvm/ADT/SparseSet.h
index 4697de097e7eb..41fd501911f2c 100644
--- a/llvm/include/llvm/ADT/SparseSet.h
+++ b/llvm/include/llvm/ADT/SparseSet.h
@@ -59,24 +59,20 @@ template <typename ValueT> struct SparseSetValTraits {
   }
 };
 
-/// SparseSetValFunctor - Helper class for selecting SparseSetValTraits. The
-/// generic implementation handles ValueT classes which either provide
-/// getSparseSetIndex() or specialize SparseSetValTraits<>.
+/// SparseSetValFunctor - Helper class for getting a value's index.
 ///
+/// In the generic case, this is done via SparseSetValTraits. When the value
+/// type is the same as the key type, the KeyFunctor is used directly.
 template <typename KeyT, typename ValueT, typename KeyFunctorT>
 struct SparseSetValFunctor {
   unsigned operator()(const ValueT &Val) const {
-    return SparseSetValTraits<ValueT>::getValIndex(Val);
+    if constexpr (std::is_same_v<KeyT, ValueT>)
+      return KeyFunctorT()(Val);
+    else
+      return SparseSetValTraits<ValueT>::getValIndex(Val);
   }
 };
 
-/// SparseSetValFunctor<KeyT, KeyT> - Helper class for the common case of
-/// identity key/value sets.
-template <typename KeyT, typename KeyFunctorT>
-struct SparseSetValFunctor<KeyT, KeyT, KeyFunctorT> {
-  unsigned operator()(const KeyT &Key) const { return KeyFunctorT()(Key); }
-};
-
 /// SparseSet - Fast set implementation for objects that can be identified by
 /// small unsigned keys.
 ///
@@ -117,7 +113,7 @@ struct SparseSetValFunctor<KeyT, KeyT, KeyFunctorT> {
 /// @tparam SparseT     An unsigned integer type. See above.
 ///
 template <typename ValueT, typename KeyT = unsigned,
-          typename KeyFunctorT = identity_cxx20, typename SparseT = uint8_t>
+          typename KeyFunctorT = identity, typename SparseT = uint8_t>
 class SparseSet {
   static_assert(std::is_unsigned_v<SparseT>,
                 "SparseT must be an unsigned integer type");
diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 26d708060ed6d..ab0d7e334df44 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -89,7 +89,7 @@ namespace llvm {
   /// allocated once for the pass. It can be cleared in constant time and reused
   /// without any frees.
   using RegUnit2SUnitsMap =
-      SparseMultiSet<PhysRegSUOper, unsigned, identity_cxx20, uint16_t>;
+      SparseMultiSet<PhysRegSUOper, unsigned, identity, uint16_t>;
 
   /// Track local uses of virtual registers. These uses are gathered by the DAG
   /// builder and may be consulted by the scheduler to avoid iterating an entire
diff --git a/llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h
index 3a36863ac7e32..5a46207d402e5 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h
@@ -34,7 +34,7 @@ class LLVM_ABI AppendingTypeTableBuilder : public TypeCollection {
 
 public:
   explicit AppendingTypeTableBuilder(BumpPtrAllocator &Storage);
-  ~AppendingTypeTableBuilder();
+  ~AppendingTypeTableBuilder() override;
 
   // TypeCollection overrides
   std::optional<TypeIndex> getFirst() override;
diff --git a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
index a587b3e2ed628..93e1c99a0110e 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
@@ -47,7 +47,7 @@ class LLVM_ABI GlobalTypeTableBuilder : public TypeCollection {
 
 public:
   explicit GlobalTypeTableBuilder(BumpPtrAllocator &Storage);
-  ~GlobalTypeTableBuilder();
+  ~GlobalTypeTableBuilder() override;
 
   // TypeCollection overrides
   std::optional<TypeIndex> getFirst() override;
diff --git a/llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h
index 730278488176a..b9b2669c84085 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h
@@ -43,7 +43,7 @@ class LLVM_ABI MergingTypeTableBuilder : public TypeCollection {
 
 public:
   explicit MergingTypeTableBuilder(BumpPtrAllocator &Storage);
-  ~MergingTypeTableBuilder();
+  ~MergingTypeTableBuilder() override;
 
   // TypeCollection overrides
   std::optional<TypeIndex> getFirst() override;
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h b/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
index 07d599cf9b5c6..e3e9b2bb91e8a 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
@@ -30,7 +30,7 @@ class GsymReader;
 class GsymContext : public DIContext {
 public:
   GsymContext(std::unique_ptr<GsymReader> Reader);
-  ~GsymContext();
+  ~GsymContext() override;
 
   GsymContext(GsymContext &) = delete;
   GsymContext &operator=(GsymContext &) = delete;
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h
index 0e7be45abfef4..34bace886a15f 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h
@@ -143,7 +143,7 @@ class LLVM_ABI LVElement : public LVObject {
         VirtualityCode(0) {}
   LVElement(const LVElement &) = delete;
   LVElement &operator=(const LVElement &) = delete;
-  virtual ~LVElement() = default;
+  ~LVElement() override = default;
 
   LVSubclassID getSubclassID() const { return SubclassID; }
 
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVLine.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVLine.h
index 3618ce7b0ecda..dd17f76f966ef 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVLine.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVLine.h
@@ -53,7 +53,7 @@ class LLVM_ABI LVLine : public LVElement {
   }
   LVLine(const LVLine &) = delete;
   LVLine &operator=(const LVLine &) = delete;
-  virtual ~LVLine() = default;
+  ~LVLine() override = default;
 
   static bool classof(const LVElement *Element) {
     return Element->getSubclassID() == LVSubclassID::LV_LINE;
@@ -117,7 +117,7 @@ class LLVM_ABI LVLineDebug final : public LVLine {
   LVLineDebug() : LVLine() { setIsLineDebug(); }
   LVLineDebug(const LVLineDebug &) = delete;
   LVLineDebug &operator=(const LVLineDebug &) = delete;
-  ~LVLineDebug() = default;
+  ~LVLineDebug() override = default;
 
   // Additional line information. It includes attributes that describes
   // states in the machine instructions (basic block, end prologue, etc).
@@ -142,7 +142,7 @@ class LLVM_ABI LVLineAssembler final : public LVLine {
   LVLineAssembler() : LVLine() { setIsLineAssembler(); }
   LVLineAssembler(const LVLineAssembler &) = delete;
   LVLineAssembler &operator=(const LVLineAssembler &) = delete;
-  ~LVLineAssembler() = default;
+  ~LVLineAssembler() override = default;
 
   // Print blanks as the line number.
   std::string noLineAsString(bool ShowZero) const override {
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVLocation.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVLocation.h
index 0718e33f5645b..090af54cd1b85 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVLocation.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVLocation.h
@@ -100,7 +100,7 @@ class LLVM_ABI LVLocation : public LVObject {
   LVLocation() : LVObject() { setIsLocation(); }
   LVLocation(const LVLocation &) = delete;
   LVLocation &operator=(const LVLocation &) = delete;
-  virtual ~LVLocation() = default;
+  ~LVLocation() override = default;
 
   PROPERTY(Property, IsAddressRange);
   PROPERTY(Property, IsBaseClassOffset);
@@ -171,7 +171,7 @@ class LLVM_ABI LVLocationSymbol final : public LVLocation {
   LVLocationSymbol() : LVLocation() {}
   LVLocationSymbol(const LVLocationSymbol &) = delete;
   LVLocationSymbol &operator=(const LVLocationSymbol &) = delete;
-  ~LVLocationSymbol() = default;
+  ~LVLocationSymbol() override = default;
 
   void addObject(LVAddress LowPC, LVAddress HighPC, LVUnsigned SectionOffset,
                  uint64_t LocDescOffset) override;
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVRange.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVRange.h
index b5c833330e59e..4fa6a9aa6f047 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVRange.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVRange.h
@@ -62,7 +62,7 @@ class LLVM_ABI LVRange final : public LVObject {
   LVRange() : LVObject(), RangesTree(Allocator) {}
   LVRange(const LVRange &) = delete;
   LVRange &operator=(const LVRange &) = delete;
-  ~LVRange() = default;
+  ~LVRange() override = default;
 
   void addEntry(LVScope *Scope, LVAddress LowerAddress, LVAddress UpperAddress);
   void addEntry(LVScope *Scope);
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
index f4f3516769938..2e2619c55d58e 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
@@ -153,7 +153,7 @@ class LLVM_ABI LVScope : public LVElement {
   }
   LVScope(const LVScope &) = delete;
   LVScope &operator=(const LVScope &) = delete;
-  virtual ~LVScope() = default;
+  ~LVScope() override = default;
 
   static bool classof(const LVElement *Element) {
     return Element->getSubclassID() == LVSubclassID::LV_SCOPE;
@@ -349,7 +349,7 @@ class LLVM_ABI LVScopeAggregate final : public LVScope {
   LVScopeAggregate() : LVScope() {}
   LVScopeAggregate(const LVScopeAggregate &) = delete;
   LVScopeAggregate &operator=(const LVScopeAggregate &) = delete;
-  ~LVScopeAggregate() = default;
+  ~LVScopeAggregate() override = default;
 
   // DW_AT_specification, DW_AT_abstract_origin.
   LVScope *getReference() const override { return Reference; }
@@ -387,7 +387,7 @@ class LLVM_ABI LVScopeAlias final : public LVScope {
   }
   LVScopeAlias(const LVScopeAlias &) = delete;
   LVScopeAlias &operator=(const LVScopeAlias &) = delete;
-  ~LVScopeAlias() = default;
+  ~LVScopeAlias() override = default;
 
   // Returns true if current scope is logically equal to the given 'Scope'.
   bool equals(const LVScope *Scope) const override;
@@ -401,7 +401,7 @@ class LLVM_ABI LVScopeArray final : public LVScope {
   LVScopeArray() : LVScope() { setIsArray(); }
   LVScopeArray(const LVScopeArray &) = delete;
   LVScopeArray &operator=(const LVScopeArray &) = delete;
-  ~LVScopeArray() = default;
+  ~LVScopeArray() override = default;
 
   void resolveExtra() override;
 
@@ -513,7 +513,7 @@ class LLVM_ABI LVScopeCompileUnit final : public LVScope {
   }
   LVScopeCompileUnit(const LVScopeCompileUnit &) = delete;
   LVScopeCompileUnit &operator=(const LVScopeCompileUnit &) = delete;
-  ~LVScopeCompileUnit() = default;
+  ~LVScopeCompileUnit() override = default;
 
   LVScope *getCompileUnitParent() const override {
     return static_cast<LVScope *>(const_cast<LVScopeCompileUnit *>(this));
@@ -643,7 +643,7 @@ class LLVM_ABI LVScopeEnumeration final : public LVScope {
   LVScopeEnumeration() : LVScope() { setIsEnumeration(); }
   LVScopeEnumeration(const LVScopeEnumeration &) = delete;
   LVScopeEnumeration &operator=(const LVScopeEnumeration &) = delete;
-  ~LVScopeEnumeration() = default;
+  ~LVScopeEnumeration() override = default;
 
   // Returns true if current scope is logically equal to the given 'Scope'.
   bool equals(const LVScope *Scope) const override;
@@ -658,7 +658,7 @@ class LLVM_ABI LVScopeFormalPack final : public LVScope {
   LVScopeFormalPack() : LVScope() { setIsTemplatePack(); }
   LVScopeFormalPack(const LVScopeFormalPack &) = delete;
   LVScopeFormalPack &operator=(const LVScopeFormalPack &) = delete;
-  ~LVScopeFormalPack() = default;
+  ~LVScopeFormalPack() override = default;
 
   // Returns true if current scope is logically equal to the given 'Scope'.
   bool equals(const LVScope *Scope) const override;
@@ -676,7 +676,7 @@ class LLVM_ABI LVScopeFunction : public LVScope {
   LVScopeFunction() : LVScope() {}
   LVScopeFunction(const LVScopeFunction &) = delete;
   LVScopeFunction &operator=(const LVScopeFunction &) = delete;
-  virtual ~LVScopeFunction() = default;
+  ~LVScopeFunction() override = default;
 
   // DW_AT_specification, DW_AT_abstract_origin.
   LVScope *getReference() const override { return Reference; }
@@ -728,7 +728,7 @@ class LLVM_ABI LVScopeFunctionInlined final : public LVScopeFunction {
   LVScopeFunctionInlined() : LVScopeFunction() { setIsInlinedFunction(); }
   LVScopeFunctionInlined(const LVScopeFunctionInlined &) = delete;
   LVScopeFunctionInlined &operator=(const LVScopeFunctionInlined &) = delete;
-  ~LVScopeFunctionInlined() = default;
+  ~LVScopeFunctionInlined() override = default;
 
   uint32_t getDiscriminator() const override { return Discriminator; }
   void setDiscriminator(uint32_t Value) override {
@@ -767,7 +767,7 @@ class LLVM_ABI LVScopeFunctionType final : public LVScopeFunction {
   LVScopeFunctionType() : LVScopeFunction() { setIsFunctionType(); }
   LVScopeFunctionType(const LVScopeFunctionType &) = delete;
   LVScopeFunctionType &operator=(const LVScopeFunctionType &) = delete;
-  ~LVScopeFunctionType() = default;
+  ~LVScopeFunctionType() override = default;
 
   void resolveExtra() override;
 };
@@ -781,7 +781,7 @@ class LLVM_ABI LVScopeModule final : public LVScope {
   }
   LVScopeModule(const LVScopeModule &) = delete;
   LVScopeModule &operator=(const LVScopeModule &) = delete;
-  ~LVScopeModule() = default;
+  ~LVScopeModule() override = default;
 
   // Returns true if current scope is logically equal to the given 'Scope'.
   bool equals(const LVScope *Scope) const override;
@@ -797,7 +797,7 @@ class LLVM_ABI LVScopeNamespace final : public LVScope {
   LVScopeNamespace() : LVScope() { setIsNamespace(); }
   LVScopeNamespace(const LVScopeNamespace &) = delete;
   LVScopeNamespace &operator=(const LVScopeNamespace &) = delete;
-  ~LVScopeNamespace() = default;
+  ~LVScopeNamespace() override = default;
 
   // Access DW_AT_extension reference.
   LVScope *getReference() const override { return Reference; }
@@ -827,7 +827,7 @@ class LLVM_ABI LVScopeRoot final : public LVScope {
   LVScopeRoot() : LVScope() { setIsRoot(); }
   LVScopeRoot(const LVScopeRoot &) = delete;
   LVScopeRoot &operator=(const LVScopeRoot &) = delete;
-  ~LVScopeRoot() = default;
+  ~LVScopeRoot() override = default;
 
   StringRef getFileFormatName() const {
     return getStringPool().getString(FileFormatNameIndex);
@@ -859,7 +859,7 @@ class LLVM_ABI LVScopeTemplatePack final : public LVScope {
   LVScopeTemplatePack() : LVScope() { setIsTemplatePack(); }
   LVScopeTemplatePack(const LVScopeTemplatePack &) = delete;
   LVScopeTemplatePack &operator=(const LVScopeTemplatePack &) = delete;
-  ~LVScopeTemplatePack() = default;
+  ~LVScopeTemplatePack() override = default;
 
   // Returns true if current scope is logically equal to the given 'Scope'.
   bool equals(const LVScope *Scope) const override;
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSymbol.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSymbol.h
index ec9017e16b659..c5314fca084da 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSymbol.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSymbol.h
@@ -74,7 +74,7 @@ class LLVM_ABI LVSymbol final : public LVElement {
   }
   LVSymbol(const LVSymbol &) = delete;
   LVSymbol &operator=(const LVSymbol &) = delete;
-  ~LVSymbol() = default;
+  ~LVSymbol() override = default;
 
   static bool classof(const LVElement *Element) {
     return Element->getSubclassID() == LVSubclassID::LV_SYMBOL;
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVType.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVType.h
index 59e6a92be8ce6..af4abcf56c5ac 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVType.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVType.h
@@ -67,7 +67,7 @@ class LLVM_ABI LVType : public LVElement {
   LVType() : LVElement(LVSubclassID::LV_TYPE) { setIsType(); }
   LVType(const LVType &) = delete;
   LVType &operator=(const LVType &) = delete;
-  virtual ~LVType() = default;
+  ~LVType() override = default;
 
   static bool classof(const LVElement *Element) {
     return Element->getSubclassID() == LVSubclassID::LV_TYPE;
@@ -157,7 +157,7 @@ class LLVM_ABI LVTypeDefinition final : public LVType {
   }
   LVTypeDefinition(const LVTypeDefinition &) = delete;
   LVTypeDefinition &operator=(const LVTypeDefinition &) = delete;
-  ~LVTypeDefinition() = default;
+  ~LVTypeDefinition() override = default;
 
   // Return the underlying type for a type definition.
   LVElement *getUnderlyingType() override;
@@ -183,7 +183,7 @@ class LLVM_ABI LVTypeEnumerator final : public LVType {
   }
   LVTypeEnumerator(const LVTypeEnumerator &) = delete;
   LVTypeEnumerator &operator=(const LVTypeEnumerator &) = delete;
-  ~LVTypeEnumerator() = default;
+  ~LVTypeEnumerator() override = default;
 
   // Process the values for a DW_TAG_enumerator.
   StringRef getValue() const override {
@@ -206,7 +206,7 @@ class LLVM_ABI LVTypeImport final : public LVType {
   LVTypeImport() : LVType() { setIncludeInPrint(); }
   LVTypeImport(const LVTypeImport &) = delete;
   LVTypeImport &operator=(const LVTypeImport &) = delete;
-  ~LVTypeImport() = default;
+  ~LVTypeImport() override = default;
 
   // Returns true if current type is logically equal to the given 'Type'.
   bool equals(const LVType *Type) const override;
@@ -223,7 +223,7 @@ class LLVM_ABI LVTypeParam final : public LVType {
   LVTypeParam();
   LVTypeParam(const LVTypeParam &) = delete;
   LVTypeParam &operator=(const LVTypeParam &) = delete;
-  ~LVTypeParam() = default;
+  ~LVTypeParam() override = default;
 
   // Template parameter value.
   StringRef getValue() const override {
@@ -256,7 +256,7 @@ class LLVM_ABI LVTypeSubrange final : public LVType {
   }
   LVTypeSubrange(const LVTypeSubrange &) = delete;
   LVTypeSubrange &operator=(const LVTypeSubrange &) = delete;
-  ~LVTypeSubrange() = default;
+  ~LVTypeSubrange() override = default;
 
   int64_t getCount() const override {
     return getIsSubrangeCount() ? LowerBound : 0;
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h
index 2cf4a8ec6a37f..cc8dda2f82bee 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h
@@ -192,7 +192,7 @@ class LVBinaryReader : public LVReader {
       : LVReader(Filename, FileFormatName, W, BinaryType) {}
   LVBinaryReader(const LVBinaryReader &) = delete;
   LVBinaryReader &operator=(const LVBinaryReader &) = delete;
-  virtual ~LVBinaryReader() = default;
+  ~LVBinaryReader() override = default;
 
   void addInlineeLines(LVScope *Scope, LVLines &Lines) {
     CUInlineeLines.emplace(Scope, std::make_unique<LVLines>(std::move(Lines)));
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h
index 4dd7c967ddc17..9f6fd553221db 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h
@@ -200,7 +200,7 @@ class LVCodeViewReader final : public LVBinaryReader {
         Input(&Pdb), ExePath(ExePath), LogicalVisitor(this, W, Input) {}
   LVCodeViewReader(const LVCodeViewReader &) = delete;
   LVCodeViewReader &operator=(const LVCodeViewReader &) = delete;
-  ~LVCodeViewReader() = default;
+  ~LVCodeViewReader() override = default;
 
   void getLinkageName(const llvm::object::coff_section *CoffSection,
                       uint32_t RelocOffset, uint32_t Offset,
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h
index 2abc18b8dacac..1cf29147fe2a1 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h
@@ -123,7 +123,7 @@ class LVDWARFReader final : public LVBinaryReader {
         Obj(Obj) {}
   LVDWARFReader(const LVDWARFReader &) = delete;
   LVDWARFReader &operator=(const LVDWARFReader &) = delete;
-  ~LVDWARFReader() = default;
+  ~LVDWARFReader() override = default;
 
   LVAddress getCUBaseAddress() const { return CUBaseAddress; }
   void setCUBaseAddress(LVAddress Address) { CUBaseAddress = Address; }
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
index b21cd092939e6..c0b315131a911 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
@@ -20,7 +20,7 @@ namespace pdb {
 class LLVM_ABI PDBSymbolTypeBuiltin : public PDBSymbol {
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::BuiltinType)
 public:
-  ~PDBSymbolTypeBuiltin();
+  ~PDBSymbolTypeBuiltin() override;
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getBuiltinType)
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
index 4a32113b460e1..6adaa8ae43895 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
@@ -34,7 +34,7 @@ using ExecutorAddrDiff = uint64_t;
 class ExecutorAddr {
 public:
   /// A wrap/unwrap function that leaves pointers unmodified.
-  using rawPtr = llvm::identity_cxx20;
+  using rawPtr = llvm::identity;
 
 #if __has_feature(ptrauth_calls)
   template <typename T> class PtrauthSignDefault {
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index d6fb2a0b0728f..6a0619dd8e0ce 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -211,7 +211,7 @@ class RegAllocFastImpl {
     unsigned getSparseSetIndex() const { return VirtReg.virtRegIndex(); }
   };
 
-  using LiveRegMap = SparseSet<LiveReg, unsigned, identity_cxx20, uint16_t>;
+  using LiveRegMap = SparseSet<LiveReg, unsigned, identity, uint16_t>;
   /// This map contains entries for each virtual register that is currently
   /// available in a physical register.
   LiveRegMap LiveVirtRegs;
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index 42e90f0e27517..d6fa65fe54b50 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 ///
-/// \file This file contains pases and utilities to convert a modern LLVM
+/// \file This file contains passes and utilities to convert a modern LLVM
 /// module into a module compatible with the LLVM 3.7-based DirectX Intermediate
 /// Language (DXIL).
 //===----------------------------------------------------------------------===//
@@ -16,7 +16,6 @@
 #include "DirectX.h"
 #include "DirectXIRPasses/PointerTypeAnalysis.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Analysis/DXILMetadataAnalysis.h"
 #include "llvm/Analysis/DXILResource.h"
@@ -27,7 +26,6 @@
 #include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/VersionTuple.h"
 
 #define DEBUG_TYPE "dxil-prepare"
@@ -116,31 +114,6 @@ static void removeStringFunctionAttributes(Function &F,
   F.removeRetAttrs(DeadAttrs);
 }
 
-static void cleanModuleFlags(Module &M) {
-  NamedMDNode *MDFlags = M.getModuleFlagsMetadata();
-  if (!MDFlags)
-    return;
-
-  SmallVector<llvm::Module::ModuleFlagEntry> FlagEntries;
-  M.getModuleFlagsMetadata(FlagEntries);
-  bool Updated = false;
-  for (auto &Flag : FlagEntries) {
-    // llvm 3.7 only supports behavior up to AppendUnique.
-    if (Flag.Behavior <= Module::ModFlagBehavior::AppendUnique)
-      continue;
-    Flag.Behavior = Module::ModFlagBehavior::Warning;
-    Updated = true;
-  }
-
-  if (!Updated)
-    return;
-
-  MDFlags->eraseFromParent();
-
-  for (auto &Flag : FlagEntries)
-    M.addModuleFlag(Flag.Behavior, Flag.Key->getString(), Flag.Val);
-}
-
 class DXILPrepareModule : public ModulePass {
 
   static Value *maybeGenerateBitcast(IRBuilder<> &Builder,
@@ -202,15 +175,6 @@ class DXILPrepareModule : public ModulePass {
                          Builder.getPtrTy(PtrTy->getAddressSpace())));
   }
 
-  static std::array<unsigned, 6> getCompatibleInstructionMDs(llvm::Module &M) {
-    return {M.getMDKindID("dx.nonuniform"),
-            M.getMDKindID("dx.controlflow.hints"),
-            M.getMDKindID("dx.precise"),
-            llvm::LLVMContext::MD_range,
-            llvm::LLVMContext::MD_alias_scope,
-            llvm::LLVMContext::MD_noalias};
-  }
-
 public:
   bool runOnModule(Module &M) override {
     PointerTypeMap PointerTypes = PointerTypeAnalysis::run(M);
@@ -224,10 +188,7 @@ class DXILPrepareModule : public ModulePass {
     const dxil::ModuleMetadataInfo MetadataInfo =
         getAnalysis<DXILMetadataAnalysisWrapperPass>().getModuleMetadata();
     VersionTuple ValVer = MetadataInfo.ValidatorVersion;
-    bool SkipValidation = ValVer.getMajor() == 0 && ValVer.getMinor() == 0;
-
-    // construct allowlist of valid metadata node kinds
-    std::array<unsigned, 6> DXILCompatibleMDs = getCompatibleInstructionMDs(M);
+    bool AllowExperimental = ValVer.getMajor() == 0 && ValVer.getMinor() == 0;
 
     for (auto &F : M.functions()) {
       F.removeFnAttrs(AttrMask);
@@ -235,7 +196,7 @@ class DXILPrepareModule : public ModulePass {
       // Only remove string attributes if we are not skipping validation.
       // This will reserve the experimental attributes when validation version
       // is 0.0 for experiment mode.
-      removeStringFunctionAttributes(F, SkipValidation);
+      removeStringFunctionAttributes(F, AllowExperimental);
       for (size_t Idx = 0, End = F.arg_size(); Idx < End; ++Idx)
         F.removeParamAttrs(Idx, AttrMask);
 
@@ -243,11 +204,17 @@ class DXILPrepareModule : public ModulePass {
         IRBuilder<> Builder(&BB);
         for (auto &I : make_early_inc_range(BB)) {
 
-          I.dropUnknownNonDebugMetadata(DXILCompatibleMDs);
+          if (auto *CB = dyn_cast<CallBase>(&I)) {
+            CB->removeFnAttrs(AttrMask);
+            CB->removeRetAttrs(AttrMask);
+            for (size_t Idx = 0, End = CB->arg_size(); Idx < End; ++Idx)
+              CB->removeParamAttrs(Idx, AttrMask);
+            continue;
+          }
 
           // Emtting NoOp bitcast instructions allows the ValueEnumerator to be
           // unmodified as it reserves instruction IDs during contruction.
-          if (auto LI = dyn_cast<LoadInst>(&I)) {
+          if (auto *LI = dyn_cast<LoadInst>(&I)) {
             if (Value *NoOpBitcast = maybeGenerateBitcast(
                     Builder, PointerTypes, I, LI->getPointerOperand(),
                     LI->getType())) {
@@ -257,7 +224,7 @@ class DXILPrepareModule : public ModulePass {
             }
             continue;
           }
-          if (auto SI = dyn_cast<StoreInst>(&I)) {
+          if (auto *SI = dyn_cast<StoreInst>(&I)) {
             if (Value *NoOpBitcast = maybeGenerateBitcast(
                     Builder, PointerTypes, I, SI->getPointerOperand(),
                     SI->getValueOperand()->getType())) {
@@ -268,39 +235,16 @@ class DXILPrepareModule : public ModulePass {
             }
             continue;
           }
-          if (auto GEP = dyn_cast<GetElementPtrInst>(&I)) {
+          if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
             if (Value *NoOpBitcast = maybeGenerateBitcast(
                     Builder, PointerTypes, I, GEP->getPointerOperand(),
                     GEP->getSourceElementType()))
               GEP->setOperand(0, NoOpBitcast);
             continue;
           }
-          if (auto *CB = dyn_cast<CallBase>(&I)) {
-            CB->removeFnAttrs(AttrMask);
-            CB->removeRetAttrs(AttrMask);
-            for (size_t Idx = 0, End = CB->arg_size(); Idx < End; ++Idx)
-              CB->removeParamAttrs(Idx, AttrMask);
-            continue;
-          }
         }
       }
     }
-    // Remove flags not for DXIL.
-    cleanModuleFlags(M);
-
-    // dx.rootsignatures will have been parsed from its metadata form as its
-    // binary form as part of the RootSignatureAnalysisWrapper, so safely
-    // remove it as it is not recognized in DXIL
-    if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures"))
-      RootSignature->eraseFromParent();
-
-    // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and
-    // causes all tests using the DXIL Validator to fail.
-    //
-    // This is a temporary fix and should be replaced with a whitelist once
-    // we have determined all metadata that the DXIL Validator allows
-    if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa"))
-      ErrNo->eraseFromParent();
 
     return true;
   }
@@ -308,11 +252,11 @@ class DXILPrepareModule : public ModulePass {
   DXILPrepareModule() : ModulePass(ID) {}
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<DXILMetadataAnalysisWrapperPass>();
-    AU.addRequired<RootSignatureAnalysisWrapper>();
-    AU.addPreserved<RootSignatureAnalysisWrapper>();
-    AU.addPreserved<ShaderFlagsAnalysisWrapper>();
+
     AU.addPreserved<DXILMetadataAnalysisWrapperPass>();
     AU.addPreserved<DXILResourceWrapperPass>();
+    AU.addPreserved<RootSignatureAnalysisWrapper>();
+    AU.addPreserved<ShaderFlagsAnalysisWrapper>();
   }
   static char ID; // Pass identification.
 };
@@ -323,7 +267,6 @@ char DXILPrepareModule::ID = 0;
 INITIALIZE_PASS_BEGIN(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(RootSignatureAnalysisWrapper)
 INITIALIZE_PASS_END(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", false,
                     false)
 
diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
index 9eebcc9b13063..1e4797bbd05aa 100644
--- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
+++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
@@ -7,8 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "DXILTranslateMetadata.h"
+#include "DXILRootSignature.h"
 #include "DXILShaderFlags.h"
 #include "DirectX.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/DXILMetadataAnalysis.h"
@@ -204,9 +206,9 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags,
   return MDNode::get(Ctx, MDVals);
 }
 
-MDTuple *constructEntryMetadata(const Function *EntryFn, MDTuple *Signatures,
-                                MDNode *Resources, MDTuple *Properties,
-                                LLVMContext &Ctx) {
+static MDTuple *constructEntryMetadata(const Function *EntryFn,
+                                       MDTuple *Signatures, MDNode *Resources,
+                                       MDTuple *Properties, LLVMContext &Ctx) {
   // Each entry point metadata record specifies:
   //  * reference to the entry point function global symbol
   //  * unmangled name
@@ -290,42 +292,82 @@ static MDTuple *emitTopLevelLibraryNode(Module &M, MDNode *RMD,
   return constructEntryMetadata(nullptr, nullptr, RMD, Properties, Ctx);
 }
 
-// TODO: We might need to refactor this to be more generic,
-// in case we need more metadata to be replaced.
-static void translateBranchMetadata(Module &M) {
-  for (Function &F : M) {
-    for (BasicBlock &BB : F) {
-      Instruction *BBTerminatorInst = BB.getTerminator();
+static void translateBranchMetadata(Module &M, Instruction *BBTerminatorInst) {
+  MDNode *HlslControlFlowMD =
+      BBTerminatorInst->getMetadata("hlsl.controlflow.hint");
+
+  if (!HlslControlFlowMD)
+    return;
 
-      MDNode *HlslControlFlowMD =
-          BBTerminatorInst->getMetadata("hlsl.controlflow.hint");
+  assert(HlslControlFlowMD->getNumOperands() == 2 &&
+         "invalid operands for hlsl.controlflow.hint");
 
-      if (!HlslControlFlowMD)
-        continue;
+  MDBuilder MDHelper(M.getContext());
 
-      assert(HlslControlFlowMD->getNumOperands() == 2 &&
-             "invalid operands for hlsl.controlflow.hint");
+  llvm::Metadata *HintsStr = MDHelper.createString("dx.controlflow.hints");
+  llvm::Metadata *HintsValue = MDHelper.createConstant(
+      mdconst::extract<ConstantInt>(HlslControlFlowMD->getOperand(1)));
 
-      MDBuilder MDHelper(M.getContext());
-      ConstantInt *Op1 =
-          mdconst::extract<ConstantInt>(HlslControlFlowMD->getOperand(1));
+  MDNode *MDNode = llvm::MDNode::get(M.getContext(), {HintsStr, HintsValue});
 
-      SmallVector<llvm::Metadata *, 2> Vals(
-          ArrayRef<Metadata *>{MDHelper.createString("dx.controlflow.hints"),
-                               MDHelper.createConstant(Op1)});
+  BBTerminatorInst->setMetadata("dx.controlflow.hints", MDNode);
+  BBTerminatorInst->setMetadata("hlsl.controlflow.hint", nullptr);
+}
+
+static std::array<unsigned, 6> getCompatibleInstructionMDs(llvm::Module &M) {
+  return {
+      M.getMDKindID("dx.nonuniform"),    M.getMDKindID("dx.controlflow.hints"),
+      M.getMDKindID("dx.precise"),       llvm::LLVMContext::MD_range,
+      llvm::LLVMContext::MD_alias_scope, llvm::LLVMContext::MD_noalias};
+}
 
-      MDNode *MDNode = llvm::MDNode::get(M.getContext(), Vals);
+static void translateInstructionMetadata(Module &M) {
+  // construct allowlist of valid metadata node kinds
+  std::array<unsigned, 6> DXILCompatibleMDs = getCompatibleInstructionMDs(M);
 
-      BBTerminatorInst->setMetadata("dx.controlflow.hints", MDNode);
-      BBTerminatorInst->setMetadata("hlsl.controlflow.hint", nullptr);
+  for (Function &F : M) {
+    for (BasicBlock &BB : F) {
+      // This needs to be done first so that "hlsl.controlflow.hints" isn't
+      // removed in the whitelist below
+      if (auto *I = BB.getTerminator())
+        translateBranchMetadata(M, I);
+
+      for (auto &I : make_early_inc_range(BB)) {
+        I.dropUnknownNonDebugMetadata(DXILCompatibleMDs);
+      }
     }
   }
 }
 
-static void translateMetadata(Module &M, DXILResourceMap &DRM,
-                              DXILResourceTypeMap &DRTM,
-                              const ModuleShaderFlags &ShaderFlags,
-                              const ModuleMetadataInfo &MMDI) {
+static void cleanModuleFlags(Module &M) {
+  NamedMDNode *MDFlags = M.getModuleFlagsMetadata();
+  if (!MDFlags)
+    return;
+
+  SmallVector<llvm::Module::ModuleFlagEntry> FlagEntries;
+  M.getModuleFlagsMetadata(FlagEntries);
+  bool Updated = false;
+  for (auto &Flag : FlagEntries) {
+    // llvm 3.7 only supports behavior up to AppendUnique.
+    if (Flag.Behavior <= Module::ModFlagBehavior::AppendUnique)
+      continue;
+    Flag.Behavior = Module::ModFlagBehavior::Warning;
+    Updated = true;
+  }
+
+  if (!Updated)
+    return;
+
+  MDFlags->eraseFromParent();
+
+  for (auto &Flag : FlagEntries)
+    M.addModuleFlag(Flag.Behavior, Flag.Key->getString(), Flag.Val);
+}
+
+static void translateGlobalMetadata(Module &M, DXILResourceMap &DRM,
+                                    DXILResourceTypeMap &DRTM,
+                                    const ModuleShaderFlags &ShaderFlags,
+                                    const ModuleMetadataInfo &MMDI) {
   LLVMContext &Ctx = M.getContext();
   IRBuilder<> IRB(Ctx);
   SmallVector<MDNode *> EntryFnMDNodes;
@@ -381,6 +423,22 @@ static void translateMetadata(Module &M, DXILResourceMap &DRM,
       M.getOrInsertNamedMetadata("dx.entryPoints");
   for (auto *Entry : EntryFnMDNodes)
     EntryPointsNamedMD->addOperand(Entry);
+
+  cleanModuleFlags(M);
+
+  // dx.rootsignatures will have been parsed from its metadata form as its
+  // binary form as part of the RootSignatureAnalysisWrapper, so safely
+  // remove it as it is not recognized in DXIL
+  if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures"))
+    RootSignature->eraseFromParent();
+
+  // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and
+  // causes all tests using the DXIL Validator to fail.
+  //
+  // This is a temporary fix and should be replaced with a allowlist once
+  // we have determined all metadata that the DXIL Validator allows
+  if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa"))
+    ErrNo->eraseFromParent();
 }
 
 PreservedAnalyses DXILTranslateMetadata::run(Module &M,
@@ -390,8 +448,8 @@ PreservedAnalyses DXILTranslateMetadata::run(Module &M,
   const ModuleShaderFlags &ShaderFlags = MAM.getResult<ShaderFlagsAnalysis>(M);
   const dxil::ModuleMetadataInfo MMDI = MAM.getResult<DXILMetadataAnalysis>(M);
 
-  translateMetadata(M, DRM, DRTM, ShaderFlags, MMDI);
-  translateBranchMetadata(M);
+  translateGlobalMetadata(M, DRM, DRTM, ShaderFlags, MMDI);
+  translateInstructionMetadata(M);
 
   return PreservedAnalyses::all();
 }
@@ -409,10 +467,13 @@ class DXILTranslateMetadataLegacy : public ModulePass {
     AU.addRequired<DXILResourceWrapperPass>();
     AU.addRequired<ShaderFlagsAnalysisWrapper>();
     AU.addRequired<DXILMetadataAnalysisWrapperPass>();
-    AU.addPreserved<DXILResourceWrapperPass>();
+    AU.addRequired<RootSignatureAnalysisWrapper>();
+
     AU.addPreserved<DXILMetadataAnalysisWrapperPass>();
-    AU.addPreserved<ShaderFlagsAnalysisWrapper>();
     AU.addPreserved<DXILResourceBindingWrapperPass>();
+    AU.addPreserved<DXILResourceWrapperPass>();
+    AU.addPreserved<RootSignatureAnalysisWrapper>();
+    AU.addPreserved<ShaderFlagsAnalysisWrapper>();
   }
 
   bool runOnModule(Module &M) override {
@@ -425,8 +486,8 @@ class DXILTranslateMetadataLegacy : public ModulePass {
     dxil::ModuleMetadataInfo MMDI =
         getAnalysis<DXILMetadataAnalysisWrapperPass>().getModuleMetadata();
 
-    translateMetadata(M, DRM, DRTM, ShaderFlags, MMDI);
-    translateBranchMetadata(M);
+    translateGlobalMetadata(M, DRM, DRTM, ShaderFlags, MMDI);
+    translateInstructionMetadata(M);
     return true;
   }
 };
@@ -443,6 +504,7 @@ INITIALIZE_PASS_BEGIN(DXILTranslateMetadataLegacy, "dxil-translate-metadata",
                       "DXIL Translate Metadata", false, false)
 INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ShaderFlagsAnalysisWrapper)
+INITIALIZE_PASS_DEPENDENCY(RootSignatureAnalysisWrapper)
 INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass)
 INITIALIZE_PASS_END(DXILTranslateMetadataLegacy, "dxil-translate-metadata",
                     "DXIL Translate Metadata", false, false)
diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.h b/llvm/lib/Target/DirectX/DXILTranslateMetadata.h
index f3f5eb1901406..4c1ffac1781e6 100644
--- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.h
+++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.h
@@ -13,7 +13,8 @@
 
 namespace llvm {
 
-/// A pass that transforms DXIL Intrinsics that don't have DXIL opCodes
+/// A pass that transforms LLVM Metadata in the module to it's DXIL equivalent,
+/// then emits all recognized DXIL Metadata
 class DXILTranslateMetadata : public PassInfoMixin<DXILTranslateMetadata> {
 public:
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index b6cbecb6133f4..10b03bbcd33dc 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -226,6 +226,7 @@ static const Align kMinOriginAlignment = Align(4);
 static const Align kShadowTLSAlignment = Align(8);
 
 // These constants must be kept in sync with the ones in msan.h.
+// TODO: increase size to match SVE/SVE2/SME/SME2 limits
 static const unsigned kParamTLSSize = 800;
 static const unsigned kRetvalTLSSize = 800;
 
@@ -1544,6 +1545,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
   }
 
+  static bool isAArch64SVCount(Type *Ty) {
+    if (TargetExtType *TTy = dyn_cast<TargetExtType>(Ty))
+      return TTy->getName() == "aarch64.svcount";
+    return false;
+  }
+
+  // This is intended to match the "AArch64 Predicate-as-Counter Type" (aka
+  // 'target("aarch64.svcount")', but not e.g., <vscale x 4 x i32>.
+  static bool isScalableNonVectorType(Type *Ty) {
+    if (!isAArch64SVCount(Ty))
+      LLVM_DEBUG(dbgs() << "isScalableNonVectorType: Unexpected type " << *Ty
+                        << "\n");
+
+    return Ty->isScalableTy() && !isa<VectorType>(Ty);
+  }
+
   void materializeChecks() {
 #ifndef NDEBUG
     // For assert below.
@@ -1672,6 +1689,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
       return Res;
     }
+    if (isScalableNonVectorType(OrigTy)) {
+      LLVM_DEBUG(dbgs() << "getShadowTy: Scalable non-vector type: " << *OrigTy
+                        << "\n");
+      return OrigTy;
+    }
+
     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
     return IntegerType::get(*MS.C, TypeSize);
   }
@@ -2185,8 +2208,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                         << *OrigIns << "\n");
       return;
     }
-#ifndef NDEBUG
+
     Type *ShadowTy = Shadow->getType();
+    if (isScalableNonVectorType(ShadowTy)) {
+      LLVM_DEBUG(dbgs() << "Skipping check of scalable non-vector " << *Shadow
+                        << " before " << *OrigIns << "\n");
+      return;
+    }
+#ifndef NDEBUG
     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
             isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
            "Can only insert checks for integer, vector, and aggregate shadow "
@@ -6972,6 +7001,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       // an extra "select". This results in much more compact IR.
       // Sa = select Sb, poisoned, (select b, Sc, Sd)
       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
+    } else if (isScalableNonVectorType(I.getType())) {
+      // This is intended to handle target("aarch64.svcount"), which can't be
+      // handled in the else branch because of incompatibility with CreateXor
+      // ("The supported LLVM operations on this type are limited to load,
+      // store, phi, select and alloca instructions").
+
+      // TODO: this currently underapproximates. Use Arm SVE EOR in the else
+      //       branch as needed instead.
+      Sa1 = getCleanShadow(getShadowTy(I.getType()));
     } else {
       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
       // If Sb (condition is poisoned), look for bits in c and d that are equal
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a1ad2dbcba8be..2591df8943752 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -4172,11 +4172,6 @@ class VPlan {
   /// definitions are VPValues that hold a pointer to their underlying IR.
   SmallVector<VPValue *, 16> VPLiveIns;
 
-  /// Mapping from SCEVs to the VPValues representing their expansions.
-  /// NOTE: This mapping is temporary and will be removed once all users have
-  /// been modeled in VPlan directly.
-  DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
-
   /// Blocks allocated and owned by the VPlan. They will be deleted once the
   /// VPlan is destroyed.
   SmallVector<VPBlockBase *> CreatedBlocks;
@@ -4424,15 +4419,6 @@ class VPlan {
   LLVM_DUMP_METHOD void dump() const;
 #endif
 
-  VPValue *getSCEVExpansion(const SCEV *S) const {
-    return SCEVToExpansion.lookup(S);
-  }
-
-  void addSCEVExpansion(const SCEV *S, VPValue *V) {
-    assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
-    SCEVToExpansion[S] = V;
-  }
-
   /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
   /// recipes to refer to the clones, and return it.
   VPlan *duplicate();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 06c3d75e5708c..fe66f133d7c03 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -32,8 +32,6 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
 }
 
 VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
-  if (auto *Expanded = Plan.getSCEVExpansion(Expr))
-    return Expanded;
   VPValue *Expanded = nullptr;
   if (auto *E = dyn_cast<SCEVConstant>(Expr))
     Expanded = Plan.getOrAddLiveIn(E->getValue());
@@ -50,7 +48,6 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
       Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe());
     }
   }
-  Plan.addSCEVExpansion(Expr, Expanded);
   return Expanded;
 }
 
diff --git a/llvm/test/CodeGen/DirectX/legalize-module-flags.ll b/llvm/test/CodeGen/DirectX/legalize-module-flags.ll
index 6c29deabc2aa3..044bd91866e61 100644
--- a/llvm/test/CodeGen/DirectX/legalize-module-flags.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-module-flags.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-prepare -mtriple=dxil-unknown-shadermodel6.0-compute %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata -mtriple=dxil-unknown-shadermodel6.0-compute %s | FileCheck %s
 
 ; Make sure behavior flag > 6 is fixed.
 ; CHECK: !{i32 2, !"frame-pointer", i32 2}
diff --git a/llvm/test/CodeGen/DirectX/legalize-module-flags2.ll b/llvm/test/CodeGen/DirectX/legalize-module-flags2.ll
index 244ec8d54e131..b8a60a8b6e662 100644
--- a/llvm/test/CodeGen/DirectX/legalize-module-flags2.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-module-flags2.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-prepare -mtriple=dxil-unknown-shadermodel6.0-library %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata -mtriple=dxil-unknown-shadermodel6.0-library %s | FileCheck %s
 
 ; CHECK: define void @main()
 ; Make sure behavior flag > 6 is fixed.
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index 13c25396ea98f..d265826cd2469 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -40,8 +40,8 @@
 ; CHECK-NEXT:   DXIL Resources Analysis
 ; CHECK-NEXT:   DXIL Module Metadata analysis
 ; CHECK-NEXT:   DXIL Shader Flag Analysis
-; CHECK-NEXT:   DXIL Translate Metadata
 ; CHECK-NEXT:   DXIL Root Signature Analysis
+; CHECK-NEXT:   DXIL Translate Metadata
 ; CHECK-NEXT:   DXIL Post Optimization Validation
 ; CHECK-NEXT:   DXIL Op Lowering
 ; CHECK-NEXT:   DXIL Prepare Module
diff --git a/llvm/test/CodeGen/DirectX/metadata-stripping.ll b/llvm/test/CodeGen/DirectX/metadata-stripping.ll
index eb939babd7d62..531ab6c334d24 100644
--- a/llvm/test/CodeGen/DirectX/metadata-stripping.ll
+++ b/llvm/test/CodeGen/DirectX/metadata-stripping.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --dxil-prepare %s | FileCheck %s
+; RUN: opt -S --dxil-translate-metadata %s | FileCheck %s
 
 ; Test that only metadata nodes that are valid in DXIL are allowed through
 
diff --git a/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll b/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll
index 9190d0305d63f..2c4140dae9e3e 100644
--- a/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll
+++ b/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll
@@ -1,6 +1,6 @@
-; RUN: opt -S -dxil-prepare < %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s
 
-; Ensures that dxil-prepare will remove the llvm.errno.tbaa metadata
+; Ensures that dxil-translate-metadata will remove the llvm.errno.tbaa metadata
 
 target triple = "dxil-unknown-shadermodel6.0-compute"
 
@@ -10,7 +10,6 @@ entry:
 }
 
 ; CHECK-NOT: !llvm.errno.tbaa
-; CHECK-NOT: {{^!}}
 
 !llvm.errno.tbaa = !{!0}
 
diff --git a/llvm/test/CodeGen/DirectX/strip-rootsignatures.ll b/llvm/test/CodeGen/DirectX/strip-rootsignatures.ll
index 3ac617ae871fc..daf20bf636b00 100644
--- a/llvm/test/CodeGen/DirectX/strip-rootsignatures.ll
+++ b/llvm/test/CodeGen/DirectX/strip-rootsignatures.ll
@@ -1,6 +1,6 @@
-; RUN: opt -S -dxil-prepare < %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s
 
-; Ensures that dxil-prepare will remove the dx.rootsignatures metadata
+; Ensures that dxil-translate-metadata  will remove the dx.rootsignatures metadata
 
 target triple = "dxil-unknown-shadermodel6.0-compute"
 
@@ -10,7 +10,6 @@ entry:
 }
 
 ; CHECK-NOT: !dx.rootsignatures
-; CHECK-NOT: {{^!}}
 
 !dx.rootsignatures = !{!2} ; list of function/root signature pairs
 !2 = !{ ptr @main, !3, i32 2 } ; function, root signature
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll
index 1c869bd41b931..e7491e985fa26 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll
@@ -1,14 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=msan -mattr=+sme -o - %s
-
-; XFAIL: *
+; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s
 
 ; Forked from llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
-; Manually minimized to show MSan leads to a compiler crash
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-android9001"
 
 define target("aarch64.svcount") @test_return_arg1(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1) nounwind {
+; CHECK-LABEL: @test_return_arg1(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret target("aarch64.svcount") [[ARG1:%.*]]
+;
   ret target("aarch64.svcount") %arg1
 }
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll
index 00cf3204464d0..e1ea9e68aefc3 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll
@@ -1,7 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=msan -mattr=+sme -o - %s
-
-; XFAIL: *
+; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s
 
 ; Forked from llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
 
@@ -12,16 +10,49 @@ target triple = "aarch64--linux-android9001"
 ; Test simple loads, stores and return.
 ;
 define target("aarch64.svcount") @test_load(ptr %ptr) nounwind {
+; CHECK-LABEL: @test_load(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[RES:%.*]] = load target("aarch64.svcount"), ptr [[PTR:%.*]], align 2
+; CHECK-NEXT:    store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret target("aarch64.svcount") [[RES]]
+;
   %res = load target("aarch64.svcount"), ptr %ptr
   ret target("aarch64.svcount") %res
 }
 
 define void @test_store(ptr %ptr, target("aarch64.svcount") %val) nounwind {
+; CHECK-LABEL: @test_store(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    store target("aarch64.svcount") zeroinitializer, ptr [[TMP3]], align 2
+; CHECK-NEXT:    store target("aarch64.svcount") [[VAL:%.*]], ptr [[PTR]], align 2
+; CHECK-NEXT:    ret void
+;
   store target("aarch64.svcount") %val, ptr %ptr
   ret void
 }
 
 define target("aarch64.svcount") @test_alloca_store_reload(target("aarch64.svcount") %val) nounwind {
+; CHECK-LABEL: @test_alloca_store_reload(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[PTR:%.*]] = alloca target("aarch64.svcount"), align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[PTR]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
+; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[TMP5]], i8 0, i64 [[TMP2]], i1 false)
+; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[PTR]] to i64
+; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576
+; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT:    store target("aarch64.svcount") zeroinitializer, ptr [[TMP8]], align 2
+; CHECK-NEXT:    store target("aarch64.svcount") [[VAL:%.*]], ptr [[PTR]], align 2
+; CHECK-NEXT:    [[RES:%.*]] = load target("aarch64.svcount"), ptr [[PTR]], align 2
+; CHECK-NEXT:    store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret target("aarch64.svcount") [[RES]]
+;
   %ptr = alloca target("aarch64.svcount"), align 1
   store target("aarch64.svcount") %val, ptr %ptr
   %res = load target("aarch64.svcount"), ptr %ptr
@@ -33,10 +64,20 @@ define target("aarch64.svcount") @test_alloca_store_reload(target("aarch64.svcou
 ;
 
 define target("aarch64.svcount") @test_return_arg1(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1) nounwind {
+; CHECK-LABEL: @test_return_arg1(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret target("aarch64.svcount") [[ARG1:%.*]]
+;
   ret target("aarch64.svcount") %arg1
 }
 
 define target("aarch64.svcount") @test_return_arg4(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1, target("aarch64.svcount") %arg2, target("aarch64.svcount") %arg3, target("aarch64.svcount") %arg4) nounwind {
+; CHECK-LABEL: @test_return_arg4(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret target("aarch64.svcount") [[ARG4:%.*]]
+;
   ret target("aarch64.svcount") %arg4
 }
 
@@ -46,22 +87,58 @@ define target("aarch64.svcount") @test_return_arg4(target("aarch64.svcount") %ar
 
 declare void @take_svcount_1(target("aarch64.svcount") %arg)
 define void @test_pass_1arg(target("aarch64.svcount") %arg) nounwind {
+; CHECK-LABEL: @test_pass_1arg(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    call void @take_svcount_1(target("aarch64.svcount") [[ARG:%.*]])
+; CHECK-NEXT:    ret void
+;
   call void @take_svcount_1(target("aarch64.svcount") %arg)
   ret void
 }
 
 declare void @take_svcount_5(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1, target("aarch64.svcount") %arg2, target("aarch64.svcount") %arg3, target("aarch64.svcount") %arg4)
 define void @test_pass_5args(target("aarch64.svcount") %arg) nounwind {
+; CHECK-LABEL: @test_pass_5args(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    call void @take_svcount_5(target("aarch64.svcount") [[ARG:%.*]], target("aarch64.svcount") [[ARG]], target("aarch64.svcount") [[ARG]], target("aarch64.svcount") [[ARG]], target("aarch64.svcount") [[ARG]])
+; CHECK-NEXT:    ret void
+;
   call void @take_svcount_5(target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg)
   ret void
 }
 
 define target("aarch64.svcount") @test_sel(target("aarch64.svcount") %x, target("aarch64.svcount") %y, i1 %cmp) sanitize_memory {
+; CHECK-LABEL: @test_sel(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i1, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[CMP:%.*]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") zeroinitializer
+; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP1]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") [[TMP2]]
+; CHECK-NEXT:    [[X_Y:%.*]] = select i1 [[CMP]], target("aarch64.svcount") [[X:%.*]], target("aarch64.svcount") [[Y:%.*]]
+; CHECK-NEXT:    store target("aarch64.svcount") [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret target("aarch64.svcount") [[X_Y]]
+;
   %x.y = select i1 %cmp, target("aarch64.svcount") %x, target("aarch64.svcount") %y
   ret target("aarch64.svcount") %x.y
 }
 
 define target("aarch64.svcount") @test_sel_cc(target("aarch64.svcount") %x, target("aarch64.svcount") %y, i32 %k) sanitize_memory {
+; CHECK-LABEL: @test_sel_cc(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[K:%.*]], -2147483648
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ugt i32 [[TMP4]], -2147483606
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], -2147483606
+; CHECK-NEXT:    [[TMP8:%.*]] = xor i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[K]], 42
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[CMP]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") zeroinitializer
+; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP8]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") [[TMP9]]
+; CHECK-NEXT:    [[X_Y:%.*]] = select i1 [[CMP]], target("aarch64.svcount") [[X:%.*]], target("aarch64.svcount") [[Y:%.*]]
+; CHECK-NEXT:    store target("aarch64.svcount") [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret target("aarch64.svcount") [[X_Y]]
+;
   %cmp = icmp sgt i32 %k, 42
   %x.y = select i1 %cmp, target("aarch64.svcount") %x, target("aarch64.svcount") %y
   ret target("aarch64.svcount") %x.y
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll
index 3f43efa233621..3ae73c5719c3a 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll
@@ -1,7 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s
-
-; XFAIL: *
+; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s | FileCheck %s
 
 ; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
 ; Manually reduced to show MSan leads to a compiler crash
@@ -10,6 +8,19 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-android9001"
 
 define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x4_f32_tuple(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[PTR:%.*]])
+; CHECK-NEXT:    ret void
+;
   %1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
   %2 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %1, ptr %ptr)
   ret void
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll
index cd04373c11d20..8d00b930abf95 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll
@@ -1,7 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s
-
-; XFAIL: *
+; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s | FileCheck %s
 
 ; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
 
@@ -9,6 +7,27 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-android9001"
 
 define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,  <vscale x 4 x i32> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x2_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7:[0-9]+]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE:%.*]], <vscale x 4 x i32> [[ZN0:%.*]], <vscale x 4 x i32> [[ZN1:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_7]], <vscale x 4 x i32> [[ZN0]], <vscale x 4 x i32> [[ZN1]], <vscale x 4 x i32> [[ZM]])
+; CHECK-NEXT:    ret void
+;
   call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice,
   <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
   <vscale x 4 x i32> %zm)
@@ -20,6 +39,27 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4
 }
 
 define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,  <vscale x 2 x i64> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x2_i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE:%.*]], <vscale x 2 x i64> [[ZN0:%.*]], <vscale x 2 x i64> [[ZN1:%.*]], <vscale x 2 x i64> [[ZM:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_7]], <vscale x 2 x i64> [[ZN0]], <vscale x 2 x i64> [[ZN1]], <vscale x 2 x i64> [[ZM]])
+; CHECK-NEXT:    ret void
+;
   call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice,
   <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
   <vscale x 2 x i64> %zm)
@@ -32,6 +72,27 @@ define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2
 
 
 define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x4_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE:%.*]], <vscale x 4 x i32> [[ZN0:%.*]], <vscale x 4 x i32> [[ZN1:%.*]], <vscale x 4 x i32> [[ZN2:%.*]], <vscale x 4 x i32> [[ZN3:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_7]], <vscale x 4 x i32> [[ZN0]], <vscale x 4 x i32> [[ZN1]], <vscale x 4 x i32> [[ZN2]], <vscale x 4 x i32> [[ZN3]], <vscale x 4 x i32> [[ZM]])
+; CHECK-NEXT:    ret void
+;
   <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
   <vscale x 4 x i32> %zm) sanitize_memory {
   call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice,
@@ -47,6 +108,27 @@ define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4
 }
 
 define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice,
+; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x4_i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE:%.*]], <vscale x 2 x i64> [[ZN0:%.*]], <vscale x 2 x i64> [[ZN1:%.*]], <vscale x 2 x i64> [[ZN2:%.*]], <vscale x 2 x i64> [[ZN3:%.*]], <vscale x 2 x i64> [[ZM:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_7]], <vscale x 2 x i64> [[ZN0]], <vscale x 2 x i64> [[ZN1]], <vscale x 2 x i64> [[ZN2]], <vscale x 2 x i64> [[ZN3]], <vscale x 2 x i64> [[ZM]])
+; CHECK-NEXT:    ret void
+;
   <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
   <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
   <vscale x 2 x i64> %zm) sanitize_memory {
@@ -64,6 +146,27 @@ define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice,
 
 
 define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+; CHECK-LABEL: @multi_vector_add_write_za_vg1x2_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE:%.*]], <vscale x 4 x i32> [[ZN0:%.*]], <vscale x 4 x i32> [[ZN1:%.*]], <vscale x 4 x i32> [[ZM1:%.*]], <vscale x 4 x i32> [[ZM2:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_7]], <vscale x 4 x i32> [[ZN0]], <vscale x 4 x i32> [[ZN1]], <vscale x 4 x i32> [[ZM1]], <vscale x 4 x i32> [[ZM2]])
+; CHECK-NEXT:    ret void
+;
   <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) sanitize_memory {
   call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice,
   <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
@@ -77,6 +180,27 @@ define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32>
 
 
 define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+; CHECK-LABEL: @multi_vector_add_write_za_vg1x2_i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE:%.*]], <vscale x 2 x i64> [[ZN0:%.*]], <vscale x 2 x i64> [[ZN1:%.*]], <vscale x 2 x i64> [[ZM1:%.*]], <vscale x 2 x i64> [[ZM2:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_7]], <vscale x 2 x i64> [[ZN0]], <vscale x 2 x i64> [[ZN1]], <vscale x 2 x i64> [[ZM1]], <vscale x 2 x i64> [[ZM2]])
+; CHECK-NEXT:    ret void
+;
   <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) sanitize_memory {
   call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice,
   <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
@@ -91,6 +215,27 @@ define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64>
 
 
 define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+; CHECK-LABEL: @multi_vector_add_write_za_vg1x4_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE:%.*]], <vscale x 4 x i32> [[ZN0:%.*]], <vscale x 4 x i32> [[ZN1:%.*]], <vscale x 4 x i32> [[ZN2:%.*]], <vscale x 4 x i32> [[ZN3:%.*]], <vscale x 4 x i32> [[ZM0:%.*]], <vscale x 4 x i32> [[ZM1:%.*]], <vscale x 4 x i32> [[ZM2:%.*]], <vscale x 4 x i32> [[ZM3:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_7]], <vscale x 4 x i32> [[ZN0]], <vscale x 4 x i32> [[ZN1]], <vscale x 4 x i32> [[ZN2]], <vscale x 4 x i32> [[ZN3]], <vscale x 4 x i32> [[ZM0]], <vscale x 4 x i32> [[ZM1]], <vscale x 4 x i32> [[ZM2]], <vscale x 4 x i32> [[ZM3]])
+; CHECK-NEXT:    ret void
+;
   <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
   <vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1,
   <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3) sanitize_memory {
@@ -109,6 +254,27 @@ define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32>
 }
 
 define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+; CHECK-LABEL: @multi_vector_add_write_za_vg1x4_i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE:%.*]], <vscale x 2 x i64> [[ZN0:%.*]], <vscale x 2 x i64> [[ZN1:%.*]], <vscale x 2 x i64> [[ZN2:%.*]], <vscale x 2 x i64> [[ZN3:%.*]], <vscale x 2 x i64> [[ZM0:%.*]], <vscale x 2 x i64> [[ZM1:%.*]], <vscale x 2 x i64> [[ZM2:%.*]], <vscale x 2 x i64> [[ZM3:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_7]], <vscale x 2 x i64> [[ZN0]], <vscale x 2 x i64> [[ZN1]], <vscale x 2 x i64> [[ZN2]], <vscale x 2 x i64> [[ZN3]], <vscale x 2 x i64> [[ZM0]], <vscale x 2 x i64> [[ZM1]], <vscale x 2 x i64> [[ZM2]], <vscale x 2 x i64> [[ZM3]])
+; CHECK-NEXT:    ret void
+;
   <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
   <vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1,
   <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3) sanitize_memory {
@@ -127,6 +293,27 @@ define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64>
 }
 
 define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x2_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE:%.*]], <vscale x 4 x i32> [[ZN0:%.*]], <vscale x 4 x i32> [[ZN1:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_7]], <vscale x 4 x i32> [[ZN0]], <vscale x 4 x i32> [[ZN1]])
+; CHECK-NEXT:    ret void
+;
   call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice,<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1)
   %slice.7 = add i32 %slice, 7
   call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice.7, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1)
@@ -134,6 +321,27 @@ define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0,
 }
 
 define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x2_i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE:%.*]], <vscale x 2 x i64> [[ZN0:%.*]], <vscale x 2 x i64> [[ZN1:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_7]], <vscale x 2 x i64> [[ZN0]], <vscale x 2 x i64> [[ZN1]])
+; CHECK-NEXT:    ret void
+;
   call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1)
   %slice.7 = add i32 %slice, 7
   call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice.7, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1)
@@ -141,6 +349,27 @@ define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0,
 }
 
 define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x2_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE:%.*]], <vscale x 4 x float> [[ZN0:%.*]], <vscale x 4 x float> [[ZN1:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE_7]], <vscale x 4 x float> [[ZN0]], <vscale x 4 x float> [[ZN1]])
+; CHECK-NEXT:    ret void
+;
   call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice,
   <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1)
   %slice.7 = add i32 %slice, 7
@@ -150,6 +379,27 @@ define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0
 }
 
 define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x2_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE:%.*]], <vscale x 2 x double> [[ZN0:%.*]], <vscale x 2 x double> [[ZN1:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE_7]], <vscale x 2 x double> [[ZN0]], <vscale x 2 x double> [[ZN1]])
+; CHECK-NEXT:    ret void
+;
   call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice,
   <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1)
   %slice.7 = add i32 %slice, 7
@@ -159,6 +409,36 @@ define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn
 }
 
 define void @multi_vector_add_za_vg1x2_f64_tuple(i64 %stride, ptr %ptr) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x2_f64_tuple(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[TMP2]], ptr [[PTR:%.*]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP5]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP5]], 1
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]]
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK:       8:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       9:
+; CHECK-NEXT:    [[TMP10:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX2]])
+; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP10]], 0
+; CHECK-NEXT:    [[TMP12:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP10]], 1
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, <vscale x 2 x double> [[TMP6]], <vscale x 2 x double> [[TMP11]])
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, <vscale x 2 x double> [[TMP7]], <vscale x 2 x double> [[TMP12]])
+; CHECK-NEXT:    ret void
+;
 entry:
   %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
   %1 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") %0, ptr %ptr)
@@ -175,6 +455,27 @@ entry:
 
 
 define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x4_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE:%.*]], <vscale x 4 x i32> [[ZN0:%.*]], <vscale x 4 x i32> [[ZN1:%.*]], <vscale x 4 x i32> [[ZN2:%.*]], <vscale x 4 x i32> [[ZN3:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_7]], <vscale x 4 x i32> [[ZN0]], <vscale x 4 x i32> [[ZN1]], <vscale x 4 x i32> [[ZN2]], <vscale x 4 x i32> [[ZN3]])
+; CHECK-NEXT:    ret void
+;
   call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice,
   <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
   <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3)
@@ -186,6 +487,27 @@ define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0,
 }
 
 define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x4_i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE:%.*]], <vscale x 2 x i64> [[ZN0:%.*]], <vscale x 2 x i64> [[ZN1:%.*]], <vscale x 2 x i64> [[ZN2:%.*]], <vscale x 2 x i64> [[ZN3:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_7]], <vscale x 2 x i64> [[ZN0]], <vscale x 2 x i64> [[ZN1]], <vscale x 2 x i64> [[ZN2]], <vscale x 2 x i64> [[ZN3]])
+; CHECK-NEXT:    ret void
+;
   call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice,
   <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
   <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3)
@@ -197,6 +519,27 @@ define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0,
 }
 
 define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x4_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE:%.*]], <vscale x 4 x float> [[ZN0:%.*]], <vscale x 4 x float> [[ZN1:%.*]], <vscale x 4 x float> [[ZN2:%.*]], <vscale x 4 x float> [[ZN3:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE_7]], <vscale x 4 x float> [[ZN0]], <vscale x 4 x float> [[ZN1]], <vscale x 4 x float> [[ZN2]], <vscale x 4 x float> [[ZN3]])
+; CHECK-NEXT:    ret void
+;
   call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice,
   <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
   <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3)
@@ -208,6 +551,73 @@ define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0
 }
 
 define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x4_f32_tuple(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[PTR:%.*]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP5]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP5]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP5]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP5]], 3
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]]
+; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK:       10:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP12:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX2]])
+; CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP12]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP12]], 1
+; CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP12]], 2
+; CHECK-NEXT:    [[TMP16:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP12]], 3
+; CHECK-NEXT:    [[TMP17:%.*]] = shl i64 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP18:%.*]] = or i64 [[TMP17]], 0
+; CHECK-NEXT:    [[MUL3:%.*]] = shl i64 [[STRIDE]], 1
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = or i64 [[TMP0]], [[TMP18]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[MUL3]]
+; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[_MSPROP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP4]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
+; CHECK:       19:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       20:
+; CHECK-NEXT:    [[TMP21:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX4]])
+; CHECK-NEXT:    [[TMP22:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP21]], 0
+; CHECK-NEXT:    [[TMP23:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP21]], 1
+; CHECK-NEXT:    [[TMP24:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP21]], 2
+; CHECK-NEXT:    [[TMP25:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP21]], 3
+; CHECK-NEXT:    [[MSPROP_MUL_CST:%.*]] = mul i64 [[TMP1]], 1
+; CHECK-NEXT:    [[MUL5:%.*]] = mul i64 [[STRIDE]], 3
+; CHECK-NEXT:    [[_MSPROP2:%.*]] = or i64 [[TMP0]], [[MSPROP_MUL_CST]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[MUL5]]
+; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i64 [[_MSPROP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP5]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]]
+; CHECK:       26:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       27:
+; CHECK-NEXT:    [[TMP28:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX6]])
+; CHECK-NEXT:    [[TMP29:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP28]], 0
+; CHECK-NEXT:    [[TMP30:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP28]], 1
+; CHECK-NEXT:    [[TMP31:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP28]], 2
+; CHECK-NEXT:    [[TMP32:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP28]], 3
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> [[TMP6]], <vscale x 4 x float> [[TMP13]], <vscale x 4 x float> [[TMP22]], <vscale x 4 x float> [[TMP29]])
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> [[TMP7]], <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[TMP23]], <vscale x 4 x float> [[TMP30]])
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> [[TMP8]], <vscale x 4 x float> [[TMP15]], <vscale x 4 x float> [[TMP24]], <vscale x 4 x float> [[TMP31]])
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> [[TMP9]], <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> [[TMP25]], <vscale x 4 x float> [[TMP32]])
+; CHECK-NEXT:    ret void
+;
 entry:
   %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
   %1 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %ptr)
@@ -243,6 +653,27 @@ entry:
 }
 
 define void @multi_vector_add_za_vg1x4_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x4_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE:%.*]], <vscale x 2 x double> [[ZN0:%.*]], <vscale x 2 x double> [[ZN1:%.*]], <vscale x 2 x double> [[ZN2:%.*]], <vscale x 2 x double> [[ZN3:%.*]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE_7]], <vscale x 2 x double> [[ZN0]], <vscale x 2 x double> [[ZN1]], <vscale x 2 x double> [[ZN2]], <vscale x 2 x double> [[ZN3]])
+; CHECK-NEXT:    ret void
+;
   call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice,
   <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
   <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3)
@@ -255,6 +686,12 @@ define void @multi_vector_add_za_vg1x4_f64(i32 %slice, <vscale x 2 x double> %zn
 
 
 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_single_x2_s8(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[RES:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.add.single.x2.nxv16i8(<vscale x 16 x i8> [[ZDN1:%.*]], <vscale x 16 x i8> [[ZDN2:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+; CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[RES]]
+;
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> }
   @llvm.aarch64.sve.add.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2,
   <vscale x 16 x i8> %zm)
@@ -262,6 +699,12 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x2_s8(<v
 }
 
 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_single_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_single_x2_s16(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[RES:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.add.single.x2.nxv8i16(<vscale x 8 x i16> [[ZDN1:%.*]], <vscale x 8 x i16> [[ZDN2:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+; CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[RES]]
+;
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> }
   @llvm.aarch64.sve.add.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2,
   <vscale x 8 x i16> %zm)
@@ -269,6 +712,12 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_single_x2_s16(<
 }
 
 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_single_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_single_x2_s32(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[RES:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.add.single.x2.nxv4i32(<vscale x 4 x i32> [[ZDN1:%.*]], <vscale x 4 x i32> [[ZDN2:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+; CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[RES]]
+;
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> }
   @llvm.aarch64.sve.add.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2,
   <vscale x 4 x i32> %zm)
@@ -276,6 +725,12 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_single_x2_s32(<
 }
 
 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_single_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_single_x2_s64(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[RES:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.add.single.x2.nxv2i64(<vscale x 2 x i64> [[ZDN1:%.*]], <vscale x 2 x i64> [[ZDN2:%.*]], <vscale x 2 x i64> [[ZM:%.*]])
+; CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[RES]]
+;
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> }
   @llvm.aarch64.sve.add.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2,
   <vscale x 2 x i64> %zm)
@@ -284,6 +739,12 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_single_x2_s64(<
 
 
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8>%zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_single_x4_s8(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[RES:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.add.single.x4.nxv16i8(<vscale x 16 x i8> [[ZDN1:%.*]], <vscale x 16 x i8> [[ZDN2:%.*]], <vscale x 16 x i8> [[ZDN3:%.*]], <vscale x 16 x i8> [[ZDN4:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+; CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[RES]]
+;
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
   @llvm.aarch64.sve.add.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2,
   <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
@@ -292,6 +753,12 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 }
 
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_x4_single_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_x4_single_s16(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[RES:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.add.single.x4.nxv8i16(<vscale x 8 x i16> [[ZDN1:%.*]], <vscale x 8 x i16> [[ZDN2:%.*]], <vscale x 8 x i16> [[ZDN3:%.*]], <vscale x 8 x i16> [[ZDN4:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+; CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[RES]]
+;
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
   @llvm.aarch64.sve.add.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2,
   <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
@@ -300,6 +767,12 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 }
 
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_x4_single_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_x4_single_s32(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[RES:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.add.single.x4.nxv4i32(<vscale x 4 x i32> [[ZDN1:%.*]], <vscale x 4 x i32> [[ZDN2:%.*]], <vscale x 4 x i32> [[ZDN3:%.*]], <vscale x 4 x i32> [[ZDN4:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+; CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[RES]]
+;
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
   @llvm.aarch64.sve.add.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2,
   <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
@@ -308,6 +781,12 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 }
 
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_x4_single_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_x4_single_s64(
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[RES:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.add.single.x4.nxv2i64(<vscale x 2 x i64> [[ZDN1:%.*]], <vscale x 2 x i64> [[ZDN2:%.*]], <vscale x 2 x i64> [[ZDN3:%.*]], <vscale x 2 x i64> [[ZDN4:%.*]], <vscale x 2 x i64> [[ZM:%.*]])
+; CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[RES]]
+;
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
   @llvm.aarch64.sve.add.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2,
   <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt
index 848ccba696e76..af503d9b82843 100644
--- a/llvm/unittests/ADT/CMakeLists.txt
+++ b/llvm/unittests/ADT/CMakeLists.txt
@@ -63,6 +63,7 @@ add_llvm_unittest(ADTTests
   PointerUnionTest.cpp
   PostOrderIteratorTest.cpp
   PriorityWorklistTest.cpp
+  RadixTreeTest.cpp
   RangeAdapterTest.cpp
   RewriteBufferTest.cpp
   SCCIteratorTest.cpp
diff --git a/llvm/unittests/ADT/RadixTreeTest.cpp b/llvm/unittests/ADT/RadixTreeTest.cpp
new file mode 100644
index 0000000000000..b2dd67c13a713
--- /dev/null
+++ b/llvm/unittests/ADT/RadixTreeTest.cpp
@@ -0,0 +1,379 @@
+//===- llvm/unittest/ADT/RadixTreeTest.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/RadixTree.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <iterator>
+#include <list>
+#include <vector>
+
+using namespace llvm;
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Pair;
+using ::testing::UnorderedElementsAre;
+
+// Test with StringRef.
+
+TEST(RadixTreeTest, Empty) {
+  RadixTree<StringRef, int> T;
+  EXPECT_TRUE(T.empty());
+  EXPECT_EQ(T.size(), 0u);
+
+  EXPECT_TRUE(T.find_prefixes("").empty());
+  EXPECT_TRUE(T.find_prefixes("A").empty());
+
+  EXPECT_EQ(T.countNodes(), 1u);
+}
+
+TEST(RadixTreeTest, InsertEmpty) {
+  RadixTree<StringRef, int> T;
+  auto [It, IsNew] = T.emplace("", 4);
+  EXPECT_TRUE(!T.empty());
+  EXPECT_EQ(T.size(), 1u);
+  EXPECT_TRUE(IsNew);
+  const auto &[K, V] = *It;
+  EXPECT_TRUE(K.empty());
+  EXPECT_EQ(4, V);
+
+  EXPECT_THAT(T, ElementsAre(Pair("", 4)));
+
+  EXPECT_THAT(T.find_prefixes(""), ElementsAre(Pair("", 4)));
+
+  EXPECT_THAT(T.find_prefixes("a"), ElementsAre(Pair("", 4)));
+
+  EXPECT_EQ(T.countNodes(), 1u);
+}
+
+TEST(RadixTreeTest, Complex) {
+  RadixTree<StringRef, int> T;
+  T.emplace("abcd", 1);
+  EXPECT_EQ(T.countNodes(), 2u);
+  T.emplace("abklm", 2);
+  EXPECT_EQ(T.countNodes(), 4u);
+  T.emplace("123abklm", 3);
+  EXPECT_EQ(T.countNodes(), 5u);
+  T.emplace("123abklm", 4);
+  EXPECT_EQ(T.countNodes(), 5u);
+  T.emplace("ab", 5);
+  EXPECT_EQ(T.countNodes(), 5u);
+  T.emplace("1234567", 6);
+  EXPECT_EQ(T.countNodes(), 7u);
+  T.emplace("123456", 7);
+  EXPECT_EQ(T.countNodes(), 8u);
+  T.emplace("123456789", 8);
+  EXPECT_EQ(T.countNodes(), 9u);
+
+  EXPECT_THAT(T, UnorderedElementsAre(Pair("abcd", 1), Pair("abklm", 2),
+                                      Pair("123abklm", 3), Pair("ab", 5),
+                                      Pair("1234567", 6), Pair("123456", 7),
+                                      Pair("123456789", 8)));
+
+  EXPECT_THAT(T.find_prefixes("1234567890"),
+              UnorderedElementsAre(Pair("1234567", 6), Pair("123456", 7),
+                                   Pair("123456789", 8)));
+
+  EXPECT_THAT(T.find_prefixes("123abklm"),
+              UnorderedElementsAre(Pair("123abklm", 3)));
+
+  EXPECT_THAT(T.find_prefixes("abcdefg"),
+              UnorderedElementsAre(Pair("abcd", 1), Pair("ab", 5)));
+
+  EXPECT_EQ(T.countNodes(), 9u);
+}
+
+TEST(RadixTreeTest, ValueWith2Parameters) {
+  RadixTree<StringRef, std::pair<std::string, int>> T;
+  T.emplace("abcd", "a", 3);
+
+  EXPECT_THAT(T, UnorderedElementsAre(Pair("abcd", Pair("a", 3))));
+}
+
+// Test different types, less readable.
+
+template <typename T> struct TestData {
+  static const T Data1[];
+  static const T Data2[];
+};
+
+template <> const char TestData<char>::Data1[] = "abcdedcba";
+template <> const char TestData<char>::Data2[] = "abCDEDCba";
+
+template <> const int TestData<int>::Data1[] = {1, 2, 3, 4, 5, 4, 3, 2, 1};
+template <> const int TestData<int>::Data2[] = {1, 2, 4, 8, 16, 8, 4, 2, 1};
+
+template <typename T> class RadixTreeTypeTest : public ::testing::Test {
+public:
+  using IteratorType = decltype(adl_begin(std::declval<const T &>()));
+  using CharType = remove_cvref_t<decltype(*adl_begin(std::declval<T &>()))>;
+
+  T make(const CharType *Data, size_t N) { return T(StringRef(Data, N)); }
+
+  T make1(size_t N) { return make(TestData<CharType>::Data1, N); }
+  T make2(size_t N) { return make(TestData<CharType>::Data2, N); }
+};
+
+template <>
+iterator_range<StringRef::const_iterator>
+RadixTreeTypeTest<iterator_range<StringRef::const_iterator>>::make(
+    const char *Data, size_t N) {
+  return StringRef(Data).take_front(N);
+}
+
+template <>
+iterator_range<StringRef::const_reverse_iterator>
+RadixTreeTypeTest<iterator_range<StringRef::const_reverse_iterator>>::make(
+    const char *Data, size_t N) {
+  return reverse(StringRef(Data).take_back(N));
+}
+
+template <>
+ArrayRef<int> RadixTreeTypeTest<ArrayRef<int>>::make(const int *Data,
+                                                     size_t N) {
+  return ArrayRef<int>(Data, Data + N);
+}
+
+template <>
+std::vector<int> RadixTreeTypeTest<std::vector<int>>::make(const int *Data,
+                                                           size_t N) {
+  return std::vector<int>(Data, Data + N);
+}
+
+template <>
+std::list<int> RadixTreeTypeTest<std::list<int>>::make(const int *Data,
+                                                       size_t N) {
+  return std::list<int>(Data, Data + N);
+}
+
+class TypeNameGenerator {
+public:
+  template <typename T> static std::string GetName(int) {
+    if (std::is_same_v<T, StringRef>)
+      return "StringRef";
+    if (std::is_same_v<T, std::string>)
+      return "string";
+    if (std::is_same_v<T, iterator_range<StringRef::const_iterator>>)
+      return "iterator_range";
+    if (std::is_same_v<T, iterator_range<StringRef::const_reverse_iterator>>)
+      return "reverse_iterator_range";
+    if (std::is_same_v<T, ArrayRef<int>>)
+      return "ArrayRef";
+    if (std::is_same_v<T, std::vector<int>>)
+      return "vector";
+    if (std::is_same_v<T, std::list<int>>)
+      return "list";
+    return "Unknown";
+  }
+};
+
+using TestTypes =
+    ::testing::Types<StringRef, std::string,
+                     iterator_range<StringRef::const_iterator>,
+                     iterator_range<StringRef::const_reverse_iterator>,
+                     ArrayRef<int>, std::vector<int>, std::list<int>>;
+
+TYPED_TEST_SUITE(RadixTreeTypeTest, TestTypes, TypeNameGenerator);
+
+TYPED_TEST(RadixTreeTypeTest, Helpers) {
+  for (size_t i = 0; i < 9; ++i) {
+    auto R1 = this->make1(i);
+    auto R2 = this->make2(i);
+    EXPECT_EQ(llvm::range_size(R1), i);
+    EXPECT_EQ(llvm::range_size(R2), i);
+    auto [I1, I2] = llvm::mismatch(R1, R2);
+    // Exactly 2 first elements of Data1 and Data2 must match.
+    EXPECT_EQ(std::distance(R1.begin(), I1), std::min<int>(2, i));
+  }
+}
+
+TYPED_TEST(RadixTreeTypeTest, Empty) {
+  RadixTree<TypeParam, int> T;
+  EXPECT_TRUE(T.empty());
+  EXPECT_EQ(T.size(), 0u);
+
+  EXPECT_TRUE(T.find_prefixes(this->make1(0)).empty());
+  EXPECT_TRUE(T.find_prefixes(this->make2(1)).empty());
+
+  EXPECT_EQ(T.countNodes(), 1u);
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertEmpty) {
+  using TreeType = RadixTree<TypeParam, int>;
+  TreeType T;
+  auto [It, IsNew] = T.emplace(this->make1(0), 5);
+  EXPECT_TRUE(!T.empty());
+  EXPECT_EQ(T.size(), 1u);
+  EXPECT_TRUE(IsNew);
+  const auto &[K, V] = *It;
+  EXPECT_TRUE(K.empty());
+  EXPECT_EQ(5, V);
+
+  EXPECT_THAT(T.find_prefixes(this->make1(0)),
+              ElementsAre(Pair(ElementsAre(), 5)));
+
+  EXPECT_THAT(T.find_prefixes(this->make2(1)),
+              ElementsAre(Pair(ElementsAre(), 5)));
+
+  EXPECT_THAT(T, ElementsAre(Pair(ElementsAre(), 5)));
+
+  EXPECT_EQ(T.countNodes(), 1u);
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertEmptyTwice) {
+  using TreeType = RadixTree<TypeParam, int>;
+  TreeType T;
+  T.emplace(this->make1(0), 5);
+  auto [It, IsNew] = T.emplace(this->make1(0), 6);
+  EXPECT_TRUE(!T.empty());
+  EXPECT_EQ(T.size(), 1u);
+  EXPECT_TRUE(!IsNew);
+  const auto &[K, V] = *It;
+  EXPECT_TRUE(K.empty());
+  EXPECT_EQ(5, V);
+
+  EXPECT_THAT(T.find_prefixes(this->make1(0)),
+              ElementsAre(Pair(ElementsAre(), 5)));
+
+  EXPECT_THAT(T.find_prefixes(this->make2(1)),
+              ElementsAre(Pair(ElementsAre(), 5)));
+
+  EXPECT_THAT(T, ElementsAre(Pair(ElementsAre(), 5)));
+
+  EXPECT_EQ(T.countNodes(), 1u);
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertOne) {
+  using TreeType = RadixTree<TypeParam, int>;
+  TreeType T;
+  auto [It, IsNew] = T.emplace(this->make1(1), 4);
+  EXPECT_TRUE(!T.empty());
+  EXPECT_EQ(T.size(), 1u);
+  EXPECT_TRUE(IsNew);
+  const auto &[K, V] = *It;
+  EXPECT_THAT(K, ElementsAreArray(this->make1(1)));
+  EXPECT_EQ(4, V);
+
+  EXPECT_THAT(T, ElementsAre(Pair(ElementsAreArray(this->make1(1)), 4)));
+
+  EXPECT_THAT(T.find_prefixes(this->make1(1)),
+              ElementsAre(Pair(ElementsAreArray(this->make1(1)), 4)));
+
+  EXPECT_THAT(T.find_prefixes(this->make1(2)),
+              ElementsAre(Pair(ElementsAreArray(this->make1(1)), 4)));
+
+  EXPECT_EQ(T.countNodes(), 2u);
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertOneTwice) {
+  using TreeType = RadixTree<TypeParam, int>;
+  TreeType T;
+  T.emplace(this->make1(1), 4);
+  auto [It, IsNew] = T.emplace(this->make1(1), 4);
+  EXPECT_TRUE(!T.empty());
+  EXPECT_EQ(T.size(), 1u);
+  EXPECT_TRUE(!IsNew);
+
+  EXPECT_THAT(T, ElementsAre(Pair(ElementsAreArray(this->make1(1)), 4)));
+  EXPECT_EQ(T.countNodes(), 2u);
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertSuperStrings) {
+  using TreeType = RadixTree<TypeParam, int>;
+  TreeType T;
+
+  for (size_t Len = 0; Len < 7; Len += 2) {
+    auto [It, IsNew] = T.emplace(this->make1(Len), Len);
+    EXPECT_TRUE(IsNew);
+  }
+
+  EXPECT_THAT(T,
+              UnorderedElementsAre(Pair(ElementsAreArray(this->make1(0)), 0),
+                                   Pair(ElementsAreArray(this->make1(2)), 2),
+                                   Pair(ElementsAreArray(this->make1(4)), 4),
+                                   Pair(ElementsAreArray(this->make1(6)), 6)));
+
+  EXPECT_THAT(T.find_prefixes(this->make1(0)),
+              UnorderedElementsAre(Pair(ElementsAreArray(this->make1(0)), 0)));
+
+  EXPECT_THAT(T.find_prefixes(this->make1(3)),
+              UnorderedElementsAre(Pair(ElementsAreArray(this->make1(0)), 0),
+                                   Pair(ElementsAreArray(this->make1(2)), 2)));
+
+  EXPECT_THAT(T.find_prefixes(this->make1(7)),
+              UnorderedElementsAre(Pair(ElementsAreArray(this->make1(0)), 0),
+                                   Pair(ElementsAreArray(this->make1(2)), 2),
+                                   Pair(ElementsAreArray(this->make1(4)), 4),
+                                   Pair(ElementsAreArray(this->make1(6)), 6)));
+
+  EXPECT_EQ(T.countNodes(), 4u);
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertSubStrings) {
+  using TreeType = RadixTree<TypeParam, int>;
+  TreeType T;
+
+  for (size_t Len = 0; Len < 7; Len += 2) {
+    auto [It, IsNew] = T.emplace(this->make1(7 - Len), 7 - Len);
+    EXPECT_TRUE(IsNew);
+  }
+
+  EXPECT_THAT(T,
+              UnorderedElementsAre(Pair(ElementsAreArray(this->make1(1)), 1),
+                                   Pair(ElementsAreArray(this->make1(3)), 3),
+                                   Pair(ElementsAreArray(this->make1(5)), 5),
+                                   Pair(ElementsAreArray(this->make1(7)), 7)));
+
+  EXPECT_THAT(T.find_prefixes(this->make1(0)), UnorderedElementsAre());
+
+  EXPECT_THAT(T.find_prefixes(this->make1(3)),
+              UnorderedElementsAre(Pair(ElementsAreArray(this->make1(1)), 1),
+                                   Pair(ElementsAreArray(this->make1(3)), 3)));
+
+  EXPECT_THAT(T.find_prefixes(this->make1(6)),
+              UnorderedElementsAre(Pair(ElementsAreArray(this->make1(1)), 1),
+                                   Pair(ElementsAreArray(this->make1(3)), 3),
+                                   Pair(ElementsAreArray(this->make1(5)), 5)));
+
+  EXPECT_EQ(T.countNodes(), 5u);
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertVShape) {
+  using TreeType = RadixTree<TypeParam, int>;
+  TreeType T;
+
+  EXPECT_EQ(T.countNodes(), 1u);
+  T.emplace(this->make1(5), 15);
+  EXPECT_EQ(T.countNodes(), 2u);
+  T.emplace(this->make2(6), 26);
+  EXPECT_EQ(T.countNodes(), 4u);
+  T.emplace(this->make2(1), 21);
+  EXPECT_EQ(T.countNodes(), 5u);
+
+  EXPECT_THAT(T,
+              UnorderedElementsAre(Pair(ElementsAreArray(this->make1(5)), 15),
+                                   Pair(ElementsAreArray(this->make2(6)), 26),
+                                   Pair(ElementsAreArray(this->make2(1)), 21)));
+
+  EXPECT_THAT(T.find_prefixes(this->make1(7)),
+              UnorderedElementsAre(Pair(ElementsAreArray(this->make2(1)), 21),
+                                   Pair(ElementsAreArray(this->make1(5)), 15)));
+
+  EXPECT_THAT(T.find_prefixes(this->make2(7)),
+              UnorderedElementsAre(Pair(ElementsAreArray(this->make2(1)), 21),
+                                   Pair(ElementsAreArray(this->make2(6)), 26)));
+
+  EXPECT_EQ(T.countNodes(), 5u);
+}
+
+} // namespace
diff --git a/llvm/unittests/ADT/STLForwardCompatTest.cpp b/llvm/unittests/ADT/STLForwardCompatTest.cpp
index 2a97e8d6a552f..c6ae6e36cfbff 100644
--- a/llvm/unittests/ADT/STLForwardCompatTest.cpp
+++ b/llvm/unittests/ADT/STLForwardCompatTest.cpp
@@ -185,7 +185,7 @@ TEST(TransformTest, ToUnderlying) {
 }
 
 TEST(STLForwardCompatTest, IdentityCxx20) {
-  llvm::identity_cxx20 identity;
+  llvm::identity identity;
 
   // Test with an lvalue.
   int X = 42;
diff --git a/llvm/unittests/DebugInfo/LogicalView/CompareElementsTest.cpp b/llvm/unittests/DebugInfo/LogicalView/CompareElementsTest.cpp
index e9c1fbae4e0a7..d3bf26ba14ff0 100644
--- a/llvm/unittests/DebugInfo/LogicalView/CompareElementsTest.cpp
+++ b/llvm/unittests/DebugInfo/LogicalView/CompareElementsTest.cpp
@@ -75,8 +75,8 @@ class ReaderTestCompare : public LVReader {
     setInstance(this);
   }
 
-  Error createScopes() { return LVReader::createScopes(); }
-  Error printScopes() { return LVReader::printScopes(); }
+  Error createScopes() override { return LVReader::createScopes(); }
+  Error printScopes() override { return LVReader::printScopes(); }
 
   void createElements();
   void addElements(bool IsReference, bool IsTarget);
diff --git a/llvm/unittests/DebugInfo/LogicalView/LocationRangesTest.cpp b/llvm/unittests/DebugInfo/LogicalView/LocationRangesTest.cpp
index 86949714afae4..7cd681369bc85 100644
--- a/llvm/unittests/DebugInfo/LogicalView/LocationRangesTest.cpp
+++ b/llvm/unittests/DebugInfo/LogicalView/LocationRangesTest.cpp
@@ -34,7 +34,7 @@ class ReaderTest : public LVReader {
 public:
   ReaderTest(ScopedPrinter &W) : LVReader("", "", W) { setInstance(this); }
 
-  Error createScopes() { return LVReader::createScopes(); }
+  Error createScopes() override { return LVReader::createScopes(); }
 };
 
 // Helper function to add a logical element to a given scope.
diff --git a/llvm/unittests/DebugInfo/LogicalView/LogicalElementsTest.cpp b/llvm/unittests/DebugInfo/LogicalView/LogicalElementsTest.cpp
index 8aa856ad30397..866739f09cf3f 100644
--- a/llvm/unittests/DebugInfo/LogicalView/LogicalElementsTest.cpp
+++ b/llvm/unittests/DebugInfo/LogicalView/LogicalElementsTest.cpp
@@ -72,8 +72,8 @@ class ReaderTestElements : public LVReader {
     setInstance(this);
   }
 
-  Error createScopes() { return LVReader::createScopes(); }
-  Error printScopes() { return LVReader::printScopes(); }
+  Error createScopes() override { return LVReader::createScopes(); }
+  Error printScopes() override { return LVReader::printScopes(); }
 
   void createElements();
   void addElements();
diff --git a/llvm/unittests/DebugInfo/LogicalView/SelectElementsTest.cpp b/llvm/unittests/DebugInfo/LogicalView/SelectElementsTest.cpp
index 70835ce05507c..2653347e76a41 100644
--- a/llvm/unittests/DebugInfo/LogicalView/SelectElementsTest.cpp
+++ b/llvm/unittests/DebugInfo/LogicalView/SelectElementsTest.cpp
@@ -60,7 +60,7 @@ class ReaderTestSelection : public LVReader {
     setInstance(this);
   }
 
-  Error createScopes() { return LVReader::createScopes(); }
+  Error createScopes() override { return LVReader::createScopes(); }
 
   void createElements();
   void addElements();
diff --git a/llvm/unittests/DebugInfo/LogicalView/WarningInternalTest.cpp b/llvm/unittests/DebugInfo/LogicalView/WarningInternalTest.cpp
index 36c6e16a78dfa..011321b92427c 100644
--- a/llvm/unittests/DebugInfo/LogicalView/WarningInternalTest.cpp
+++ b/llvm/unittests/DebugInfo/LogicalView/WarningInternalTest.cpp
@@ -117,7 +117,7 @@ class ReaderTestWarningInternal : public LVReader {
     setInstance(this);
   }
 
-  Error createScopes() { return LVReader::createScopes(); }
+  Error createScopes() override { return LVReader::createScopes(); }
 
   void setMapping();
   void createElements();