From 29cfef188088cb0101b3ec70b13d68c06a2d49d6 Mon Sep 17 00:00:00 2001
From: Mirko <mirkomueller97@live.de>
Date: Mon, 24 Nov 2025 16:53:09 +0100
Subject: [PATCH 01/37] [PDB][NativeSession] Use better error code for invalid
 format (#167885)

Replaces the default "Success" std::error_code with a more meaningful
one if `Magic != file_magic::pdb`.
---
 .../DebugInfo/PDB/Native/NativeSession.cpp    | 25 +++++++++++++------
 .../DebugInfo/PDB/NativeSessionTest.cpp       | 12 +++++++++
 2 files changed, 29 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
index 8967a2eb1749e..49674b4c32de0 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -87,6 +87,19 @@ Error NativeSession::createFromPdb(std::unique_ptr<MemoryBuffer> Buffer,
   return Error::success();
 }
 
+static Error validatePdbMagic(StringRef PdbPath) {
+  file_magic Magic;
+  if (auto EC = identify_magic(PdbPath, Magic))
+    return make_error<RawError>(EC);
+
+  if (Magic != file_magic::pdb)
+    return make_error<RawError>(
+        raw_error_code::invalid_format,
+        "The input file did not contain the pdb file magic.");
+
+  return Error::success();
+}
+
 static Expected<std::unique_ptr<PDBFile>>
 loadPdbFile(StringRef PdbPath, std::unique_ptr<BumpPtrAllocator> &Allocator) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer =
@@ -97,10 +110,8 @@ loadPdbFile(StringRef PdbPath, std::unique_ptr<BumpPtrAllocator> &Allocator) {
   std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(*ErrorOrBuffer);
 
   PdbPath = Buffer->getBufferIdentifier();
-  file_magic Magic;
-  auto EC = identify_magic(PdbPath, Magic);
-  if (EC || Magic != file_magic::pdb)
-    return make_error<RawError>(EC);
+  if (auto EC = validatePdbMagic(PdbPath))
+    return std::move(EC);
 
   auto Stream = std::make_unique<MemoryBufferByteStream>(
       std::move(Buffer), llvm::endianness::little);
@@ -152,10 +163,8 @@ Error NativeSession::createFromExe(StringRef ExePath,
   if (!PdbPath)
     return PdbPath.takeError();
 
-  file_magic Magic;
-  auto EC = identify_magic(PdbPath.get(), Magic);
-  if (EC || Magic != file_magic::pdb)
-    return make_error<RawError>(EC);
+  if (auto EC = validatePdbMagic(PdbPath.get()))
+    return EC;
 
   auto Allocator = std::make_unique<BumpPtrAllocator>();
   auto File = loadPdbFile(PdbPath.get(), Allocator);
diff --git a/llvm/unittests/DebugInfo/PDB/NativeSessionTest.cpp b/llvm/unittests/DebugInfo/PDB/NativeSessionTest.cpp
index cffaf7c9543fb..20ae253513f05 100644
--- a/llvm/unittests/DebugInfo/PDB/NativeSessionTest.cpp
+++ b/llvm/unittests/DebugInfo/PDB/NativeSessionTest.cpp
@@ -40,6 +40,18 @@ TEST(NativeSessionTest, TestCreateFromExe) {
   ASSERT_THAT_ERROR(std::move(E), Succeeded());
 }
 
+TEST(NativeSessionTest, TestInvalidPdbMagicError) {
+  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
+  llvm::sys::path::append(InputsDir, "SimpleTest.cpp");
+  std::string CppPath{InputsDir};
+  std::unique_ptr<IPDBSession> S;
+
+  Error E = NativeSession::createFromPdbPath(CppPath, S);
+  const char *FormatErr = "The record is in an unexpected format. "
+                          "The input file did not contain the pdb file magic.";
+  ASSERT_THAT_ERROR(std::move(E), FailedWithMessage(FormatErr));
+}
+
 TEST(NativeSessionTest, TestSetLoadAddress) {
   std::unique_ptr<IPDBSession> S;
   Error E = pdb::loadDataForEXE(PDB_ReaderType::Native, getExePath(), S);

From 2bdd1357c826afe681ab0d6ddfa8fb814b2cef6a Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser@berlin.de>
Date: Mon, 24 Nov 2025 16:53:58 +0100
Subject: [PATCH 02/37] [libc++] Optimize num_get integral functions (#121795)

```
---------------------------------------------------
Benchmark                            old        new
---------------------------------------------------
BM_num_get<bool>                 86.5 ns    32.3 ns
BM_num_get<long>                 82.1 ns    30.3 ns
BM_num_get<long long>            85.2 ns    33.4 ns
BM_num_get<unsigned short>       85.3 ns    31.2 ns
BM_num_get<unsigned int>         84.2 ns    31.1 ns
BM_num_get<unsigned long>        83.6 ns    31.9 ns
BM_num_get<unsigned long long>   87.7 ns    31.5 ns
BM_num_get<float>                 116 ns     114 ns
BM_num_get<double>                114 ns     114 ns
BM_num_get<long double>           113 ns     114 ns
BM_num_get<void*>                 151 ns     144 ns
```

This patch applies multiple optimizations:
- Stages two and three of do_get are merged and a custom integer parser
has been implemented
This avoids allocations, removes the need for strto{,u}ll and avoids
__stage2_int_loop (avoiding extra writes to memory)
- std::find has been replaced with __atoms_offset, which uses vector
instructions to look for a character

Fixes #158100
Fixes #158102
---
 libcxx/docs/ReleaseNotes/22.rst               |   2 +
 libcxx/include/CMakeLists.txt                 |   1 -
 libcxx/include/__algorithm/simd_utils.h       |  26 ++
 libcxx/include/__locale_dir/locale_base_api.h |  19 -
 .../locale_base_api/bsd_locale_fallbacks.h    |  10 -
 .../__locale_dir/locale_base_api/ibm.h        |  11 -
 .../__locale_dir/locale_base_api/musl.h       |  31 --
 libcxx/include/__locale_dir/num.h             | 414 ++++++++----------
 .../include/__locale_dir/support/bsd_like.h   |  15 -
 libcxx/include/__locale_dir/support/fuchsia.h |   7 -
 libcxx/include/__locale_dir/support/linux.h   |  33 --
 .../__locale_dir/support/no_locale/strtonum.h |   9 -
 libcxx/include/__locale_dir/support/windows.h |  25 --
 .../__support/xlocale/__strtonum_fallback.h   |   8 -
 libcxx/include/module.modulemap.in            |   1 -
 libcxx/src/locale.cpp                         |  48 ++
 .../facet.num.get.members/get_long.pass.cpp   | 150 +++++++
 .../get_unsigned_int.pass.cpp                 |  11 +
 .../get_unsigned_long.pass.cpp                |  11 +
 .../get_unsigned_long_long.pass.cpp           |  11 +
 .../get_unsigned_short.pass.cpp               |  11 +
 21 files changed, 446 insertions(+), 408 deletions(-)
 delete mode 100644 libcxx/include/__locale_dir/locale_base_api/musl.h

diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst
index b8e0e9b5a1814..6ea323ced3003 100644
--- a/libcxx/docs/ReleaseNotes/22.rst
+++ b/libcxx/docs/ReleaseNotes/22.rst
@@ -83,6 +83,8 @@ Improvements and New Features
   iterators, resulting in a performance improvement for ``std::deque<short>`` and
   ``std::join_view<vector<vector<short>>>`` iterators.
 
+- The ``num_get::do_get`` integral overloads have been optimized, resulting in a performance improvement of up to 2.8x.
+
 Deprecations and Removals
 -------------------------
 
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 4b2713191c1c0..f289666ec12ab 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -520,7 +520,6 @@ set(files
   __locale_dir/locale_base_api.h
   __locale_dir/locale_base_api/bsd_locale_fallbacks.h
   __locale_dir/locale_base_api/ibm.h
-  __locale_dir/locale_base_api/musl.h
   __locale_dir/locale_base_api/openbsd.h
   __locale_dir/messages.h
   __locale_dir/money.h
diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h
index aaeb8a881df18..f73c9ea4b6ea7 100644
--- a/libcxx/include/__algorithm/simd_utils.h
+++ b/libcxx/include/__algorithm/simd_utils.h
@@ -114,6 +114,27 @@ template <class _VecT, class _Iter>
   }(make_index_sequence<__simd_vector_size_v<_VecT>>{});
 }
 
+// Load the first _Np elements, zero the rest
+_LIBCPP_DIAGNOSTIC_PUSH
+_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wpsabi")
+template <class _VecT, size_t _Np, class _Iter>
+[[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __partial_load(_Iter __iter) noexcept {
+  return [=]<size_t... _LoadIndices, size_t... _ZeroIndices>(
+             index_sequence<_LoadIndices...>, index_sequence<_ZeroIndices...>) _LIBCPP_ALWAYS_INLINE noexcept {
+    return _VecT{__iter[_LoadIndices]..., ((void)_ZeroIndices, 0)...};
+  }(make_index_sequence<_Np>{}, make_index_sequence<__simd_vector_size_v<_VecT> - _Np>{});
+}
+
+// Create a vector where every elements is __val
+template <class _VecT>
+[[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT
+__broadcast(__simd_vector_underlying_type_t<_VecT> __val) {
+  return [&]<std::size_t... _Indices>(index_sequence<_Indices...>) {
+    return _VecT{((void)_Indices, __val)...};
+  }(make_index_sequence<__simd_vector_size_v<_VecT>>());
+}
+_LIBCPP_DIAGNOSTIC_POP
+
 template <class _Tp, size_t _Np>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __any_of(__simd_vector<_Tp, _Np> __vec) noexcept {
   return __builtin_reduce_or(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
@@ -124,6 +145,11 @@ template <class _Tp, size_t _Np>
   return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
 }
 
+template <class _Tp, size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __none_of(__simd_vector<_Tp, _Np> __vec) noexcept {
+  return !__builtin_reduce_or(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
+}
+
 template <class _Tp, size_t _Np>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept {
   using __mask_vec = __simd_vector<bool, _Np>;
diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h
index fef90bb77991f..d26d529d4e0c2 100644
--- a/libcxx/include/__locale_dir/locale_base_api.h
+++ b/libcxx/include/__locale_dir/locale_base_api.h
@@ -57,8 +57,6 @@
 //  float               __strtof(const char*, char**, __locale_t);
 //  double              __strtod(const char*, char**, __locale_t);
 //  long double         __strtold(const char*, char**, __locale_t);
-//  long long           __strtoll(const char*, char**, __locale_t);
-//  unsigned long long  __strtoull(const char*, char**, __locale_t);
 // }
 //
 // Character manipulation functions
@@ -104,7 +102,6 @@
 //
 //  int     __snprintf(char*, size_t, __locale_t, const char*, ...); // required by the headers
 //  int     __asprintf(char**, __locale_t, const char*, ...);        // required by the headers
-//  int     __sscanf(const char*, __locale_t, const char*, ...);     // required by the headers
 // }
 
 #if _LIBCPP_HAS_LOCALIZATION
@@ -131,8 +128,6 @@
 #      include <__locale_dir/locale_base_api/ibm.h>
 #    elif defined(__OpenBSD__)
 #      include <__locale_dir/locale_base_api/openbsd.h>
-#    elif defined(__wasi__) || _LIBCPP_HAS_MUSL_LIBC
-#      include <__locale_dir/locale_base_api/musl.h>
 #    endif
 
 #    include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h>
@@ -192,15 +187,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __
   return strtold_l(__nptr, __endptr, __loc);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return strtoll_l(__nptr, __endptr, __base, __loc);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return strtoull_l(__nptr, __endptr, __base, __loc);
-}
-
 //
 // Character manipulation functions
 //
@@ -299,11 +285,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __
     char** __s, __locale_t __loc, const char* __format, _Args&&... __args) {
   return std::__libcpp_asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...);
 }
-template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) {
-  return std::__libcpp_sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...);
-}
 _LIBCPP_DIAGNOSTIC_POP
 #    undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT
 
diff --git a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h
index b62a1b737e97f..8cdbe0cd15051 100644
--- a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h
+++ b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h
@@ -125,16 +125,6 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __libcpp_asprintf_l(
   return __res;
 }
 
-inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l(
-    const char* __s, locale_t __l, const char* __format, ...) {
-  va_list __va;
-  va_start(__va, __format);
-  __locale_guard __current(__l);
-  int __res = vsscanf(__s, __format, __va);
-  va_end(__va);
-  return __res;
-}
-
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H
diff --git a/libcxx/include/__locale_dir/locale_base_api/ibm.h b/libcxx/include/__locale_dir/locale_base_api/ibm.h
index 1d1d15df9f799..47a83eac7df36 100644
--- a/libcxx/include/__locale_dir/locale_base_api/ibm.h
+++ b/libcxx/include/__locale_dir/locale_base_api/ibm.h
@@ -53,11 +53,6 @@ struct __setAndRestore {
 
 // The following are not POSIX routines.  These are quick-and-dirty hacks
 // to make things pretend to work
-inline _LIBCPP_HIDE_FROM_ABI long long strtoll_l(const char* __nptr, char** __endptr, int __base, locale_t locale) {
-  __setAndRestore __newloc(locale);
-  return ::strtoll(__nptr, __endptr, __base);
-}
-
 inline _LIBCPP_HIDE_FROM_ABI double strtod_l(const char* __nptr, char** __endptr, locale_t locale) {
   __setAndRestore __newloc(locale);
   return ::strtod(__nptr, __endptr);
@@ -73,12 +68,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double strtold_l(const char* __nptr, char** __
   return ::strtold(__nptr, __endptr);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-strtoull_l(const char* __nptr, char** __endptr, int __base, locale_t locale) {
-  __setAndRestore __newloc(locale);
-  return ::strtoull(__nptr, __endptr, __base);
-}
-
 inline _LIBCPP_HIDE_FROM_ABI
 _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 2, 0) int vasprintf(char** strp, const char* fmt, va_list ap) {
   const size_t buff_size = 256;
diff --git a/libcxx/include/__locale_dir/locale_base_api/musl.h b/libcxx/include/__locale_dir/locale_base_api/musl.h
deleted file mode 100644
index 1653214cdba1e..0000000000000
--- a/libcxx/include/__locale_dir/locale_base_api/musl.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// -*- C++ -*-
-//===-----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// This adds support for the extended locale functions that are currently
-// missing from the Musl C library.
-//
-// This only works when the specified locale is "C" or "POSIX", but that's
-// about as good as we can do without implementing full xlocale support
-// in Musl.
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H
-#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H
-
-#include <cstdlib>
-#include <cwchar>
-
-inline _LIBCPP_HIDE_FROM_ABI long long strtoll_l(const char* __nptr, char** __endptr, int __base, locale_t) {
-  return ::strtoll(__nptr, __endptr, __base);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, char** __endptr, int __base, locale_t) {
-  return ::strtoull(__nptr, __endptr, __base);
-}
-
-#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H
diff --git a/libcxx/include/__locale_dir/num.h b/libcxx/include/__locale_dir/num.h
index 6eef9b3095d6e..98b8eb0c600f5 100644
--- a/libcxx/include/__locale_dir/num.h
+++ b/libcxx/include/__locale_dir/num.h
@@ -12,6 +12,7 @@
 #include <__algorithm/copy.h>
 #include <__algorithm/find.h>
 #include <__algorithm/reverse.h>
+#include <__algorithm/simd_utils.h>
 #include <__charconv/to_chars_integral.h>
 #include <__charconv/traits.h>
 #include <__config>
@@ -48,9 +49,9 @@ struct _LIBCPP_EXPORTED_FROM_ABI __num_get_base {
   static int __get_base(ios_base&);
   static const char __src[33]; // "0123456789abcdefABCDEFxX+-pPiInN"
   // count of leading characters in __src used for parsing integers ("012..X+-")
-  static const size_t __int_chr_cnt = 26;
+  static inline const size_t __int_chr_cnt = 26;
   // count of leading characters in __src used for parsing floating-point values ("012..-pP")
-  static const size_t __fp_chr_cnt = 28;
+  static inline const size_t __fp_chr_cnt = 28;
 };
 
 template <class _CharT>
@@ -73,7 +74,8 @@ struct __num_get : protected __num_get_base {
 
   [[__deprecated__("This exists only for ABI compatibility")]] static string
   __stage2_int_prep(ios_base& __iob, _CharT* __atoms, _CharT& __thousands_sep);
-  static int __stage2_int_loop(
+
+  [[__deprecated__("This exists only for ABI compatibility")]] static int __stage2_int_loop(
       _CharT __ct,
       int __base,
       char* __a,
@@ -85,11 +87,24 @@ struct __num_get : protected __num_get_base {
       unsigned*& __g_end,
       _CharT* __atoms);
 
-  _LIBCPP_HIDE_FROM_ABI static string __stage2_int_prep(ios_base& __iob, _CharT& __thousands_sep) {
-    locale __loc                 = __iob.getloc();
-    const numpunct<_CharT>& __np = use_facet<numpunct<_CharT> >(__loc);
-    __thousands_sep              = __np.thousands_sep();
-    return __np.grouping();
+  _LIBCPP_HIDE_FROM_ABI static ptrdiff_t __atoms_offset(const _CharT* __atoms, _CharT __val) {
+    // TODO: Remove the manual vectorization once https://llvm.org/PR168551 is resolved
+#  if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS
+    if constexpr (is_same<_CharT, char>::value) {
+      // TODO(LLVM 24): This can be removed, since -Wpsabi doesn't warn on [[gnu::always_inline]] functions anymore.
+      _LIBCPP_DIAGNOSTIC_PUSH
+      _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wpsabi")
+      using __vec   = __simd_vector<char, 32>;
+      __vec __chars = std::__broadcast<__vec>(__val);
+      __vec __cmp   = std::__partial_load<__vec, __int_chr_cnt>(__atoms);
+      auto __res    = __chars == __cmp;
+      if (std::__none_of(__res))
+        return __int_chr_cnt;
+      return std::min(__int_chr_cnt, std::__find_first_set(__res));
+      _LIBCPP_DIAGNOSTIC_POP
+    }
+#  endif
+    return std::find(__atoms, __atoms + __int_chr_cnt, __val) - __atoms;
   }
 
   _LIBCPP_HIDE_FROM_ABI const _CharT* __do_widen(ios_base& __iob, _CharT* __atoms) const {
@@ -122,54 +137,6 @@ string __num_get<_CharT>::__stage2_float_prep(
   return __np.grouping();
 }
 
-template <class _CharT>
-int __num_get<_CharT>::__stage2_int_loop(
-    _CharT __ct,
-    int __base,
-    char* __a,
-    char*& __a_end,
-    unsigned& __dc,
-    _CharT __thousands_sep,
-    const string& __grouping,
-    unsigned* __g,
-    unsigned*& __g_end,
-    _CharT* __atoms) {
-  if (__a_end == __a && (__ct == __atoms[24] || __ct == __atoms[25])) {
-    *__a_end++ = __ct == __atoms[24] ? '+' : '-';
-    __dc       = 0;
-    return 0;
-  }
-  if (__grouping.size() != 0 && __ct == __thousands_sep) {
-    if (__g_end - __g < __num_get_buf_sz) {
-      *__g_end++ = __dc;
-      __dc       = 0;
-    }
-    return 0;
-  }
-  ptrdiff_t __f = std::find(__atoms, __atoms + __int_chr_cnt, __ct) - __atoms;
-  if (__f >= 24)
-    return -1;
-  switch (__base) {
-  case 8:
-  case 10:
-    if (__f >= __base)
-      return -1;
-    break;
-  case 16:
-    if (__f < 22)
-      break;
-    if (__a_end != __a && __a_end - __a <= 2 && __a_end[-1] == '0') {
-      __dc       = 0;
-      *__a_end++ = __src[__f];
-      return 0;
-    }
-    return -1;
-  }
-  *__a_end++ = __src[__f];
-  ++__dc;
-  return 0;
-}
-
 template <class _CharT>
 int __num_get<_CharT>::__stage2_float_loop(
     _CharT __ct,
@@ -274,65 +241,6 @@ _LIBCPP_HIDE_FROM_ABI _Tp __num_get_float(const char* __a, const char* __a_end,
   return 0;
 }
 
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _Tp
-__num_get_signed_integral(const char* __a, const char* __a_end, ios_base::iostate& __err, int __base) {
-  if (__a != __a_end) {
-    __libcpp_remove_reference_t<decltype(errno)> __save_errno = errno;
-    errno                                                     = 0;
-    char* __p2;
-    long long __ll = __locale::__strtoll(__a, &__p2, __base, _LIBCPP_GET_C_LOCALE);
-    __libcpp_remove_reference_t<decltype(errno)> __current_errno = errno;
-    if (__current_errno == 0)
-      errno = __save_errno;
-    if (__p2 != __a_end) {
-      __err = ios_base::failbit;
-      return 0;
-    } else if (__current_errno == ERANGE || __ll < numeric_limits<_Tp>::min() || numeric_limits<_Tp>::max() < __ll) {
-      __err = ios_base::failbit;
-      if (__ll > 0)
-        return numeric_limits<_Tp>::max();
-      else
-        return numeric_limits<_Tp>::min();
-    }
-    return static_cast<_Tp>(__ll);
-  }
-  __err = ios_base::failbit;
-  return 0;
-}
-
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _Tp
-__num_get_unsigned_integral(const char* __a, const char* __a_end, ios_base::iostate& __err, int __base) {
-  if (__a != __a_end) {
-    const bool __negate = *__a == '-';
-    if (__negate && ++__a == __a_end) {
-      __err = ios_base::failbit;
-      return 0;
-    }
-    __libcpp_remove_reference_t<decltype(errno)> __save_errno = errno;
-    errno                                                     = 0;
-    char* __p2;
-    unsigned long long __ll = __locale::__strtoull(__a, &__p2, __base, _LIBCPP_GET_C_LOCALE);
-    __libcpp_remove_reference_t<decltype(errno)> __current_errno = errno;
-    if (__current_errno == 0)
-      errno = __save_errno;
-    if (__p2 != __a_end) {
-      __err = ios_base::failbit;
-      return 0;
-    } else if (__current_errno == ERANGE || numeric_limits<_Tp>::max() < __ll) {
-      __err = ios_base::failbit;
-      return numeric_limits<_Tp>::max();
-    }
-    _Tp __res = static_cast<_Tp>(__ll);
-    if (__negate)
-      __res = -__res;
-    return __res;
-  }
-  __err = ios_base::failbit;
-  return 0;
-}
-
 template <class _CharT, class _InputIterator = istreambuf_iterator<_CharT> >
 class num_get : public locale::facet, private __num_get<_CharT> {
 public:
@@ -470,137 +378,194 @@ class num_get : public locale::facet, private __num_get<_CharT> {
     return __b;
   }
 
-  template <class _Signed>
-  _LIBCPP_HIDE_FROM_ABI iter_type
-  __do_get_signed(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Signed& __v) const {
+  template <class _MaybeSigned>
+  iter_type __do_get_integral(
+      iter_type __first, iter_type __last, ios_base& __iob, ios_base::iostate& __err, _MaybeSigned& __v) const {
+    using _Unsigned = __make_unsigned_t<_MaybeSigned>;
+
     // Stage 1
     int __base = this->__get_base(__iob);
-    // Stage 2
-    char_type __thousands_sep;
-    const int __atoms_size = __num_get_base::__int_chr_cnt;
-    char_type __atoms1[__atoms_size];
-    const char_type* __atoms = this->__do_widen(__iob, __atoms1);
-    string __grouping        = this->__stage2_int_prep(__iob, __thousands_sep);
-    string __buf;
-    __buf.resize(__buf.capacity());
-    char* __a     = &__buf[0];
-    char* __a_end = __a;
+
+    // Stages 2 & 3
+    // These are combined into a single step where we parse the characters and calculate the value in one go instead of
+    // storing the relevant characters first (in an allocated buffer) and parse the characters after we extracted them.
+    // This makes the whole process significantly faster, since we avoid potential allocations and copies.
+
+    const auto& __numpunct    = use_facet<numpunct<_CharT> >(__iob.getloc());
+    char_type __thousands_sep = __numpunct.thousands_sep();
+    string __grouping         = __numpunct.grouping();
+
+    char_type __atoms_buffer[__num_get_base::__int_chr_cnt];
+    const char_type* __atoms = this->__do_widen(__iob, __atoms_buffer);
     unsigned __g[__num_get_base::__num_get_buf_sz];
     unsigned* __g_end = __g;
     unsigned __dc     = 0;
-    for (; __b != __e; ++__b) {
-      if (__a_end == __a + __buf.size()) {
-        size_t __tmp = __buf.size();
-        __buf.resize(2 * __buf.size());
-        __buf.resize(__buf.capacity());
-        __a     = &__buf[0];
-        __a_end = __a + __tmp;
+
+    if (__first == __last) {
+      __err |= ios_base::eofbit | ios_base::failbit;
+      __v = 0;
+      return __first;
+    }
+
+    while (!__grouping.empty() && *__first == __thousands_sep) {
+      ++__first;
+      if (__g_end - __g < this->__num_get_buf_sz)
+        *__g_end++ = 0;
+    }
+
+    bool __negate = false;
+    // __c == '+' || __c == '-'
+    if (auto __c = *__first; __c == __atoms[24] || __c == __atoms[25]) {
+      __negate = __c == __atoms[25];
+      ++__first;
+    }
+
+    if (__first == __last) {
+      __err |= ios_base::eofbit | ios_base::failbit;
+      __v = 0;
+      return __first;
+    }
+
+    bool __parsed_num = false;
+
+    // If we don't have a pre-set base, figure it out and swallow any prefix
+    if (__base == 0) {
+      auto __c = *__first;
+      // __c == '0'
+      if (__c == __atoms[0]) {
+        ++__first;
+        if (__first == __last) {
+          __err |= ios_base::eofbit;
+          return __first;
+        }
+        // __c2 == 'x' || __c2 == 'X'
+        if (auto __c2 = *__first; __c2 == __atoms[22] || __c2 == __atoms[23]) {
+          __base = 16;
+          ++__first;
+        } else {
+          __base = 8;
+        }
+      } else {
+        __base = 10;
+      }
+
+      // If the base has been specified explicitly, try to swallow the appropriate prefix. We only need to do something
+      // special for hex, since decimal has no prefix and octal's prefix is '0', which doesn't change the value that
+      // we'll parse if we don't swallow it.
+    } else if (__base == 16) {
+      // Try to swallow '0x'
+
+      // *__first == '0'
+      if (*__first == __atoms[0]) {
+        ++__first;
+        if (__first == __last) {
+          __err |= ios_base::eofbit;
+          __v = 0;
+          return __first;
+        }
+        // __c == 'x' || __c == 'X'
+        if (auto __c = *__first; __c == __atoms[22] || __c == __atoms[23])
+          ++__first;
+        else
+          __parsed_num = true; // We only swallowed '0', so we've started to parse a number
       }
-      if (this->__stage2_int_loop(
-              *__b,
-              __base,
-              __a,
-              __a_end,
-              __dc,
-              __thousands_sep,
-              __grouping,
-              __g,
-              __g_end,
-              const_cast<char_type*>(__atoms)))
-        break;
     }
-    if (__grouping.size() != 0 && __g_end - __g < __num_get_base::__num_get_buf_sz)
-      *__g_end++ = __dc;
-    // Stage 3
-    __v = std::__num_get_signed_integral<_Signed>(__a, __a_end, __err, __base);
-    // Digit grouping checked
-    __check_grouping(__grouping, __g, __g_end, __err);
-    // EOF checked
-    if (__b == __e)
-      __err |= ios_base::eofbit;
-    return __b;
-  }
 
-  template <class _Unsigned>
-  _LIBCPP_HIDE_FROM_ABI iter_type
-  __do_get_unsigned(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Unsigned& __v) const {
-    // Stage 1
-    int __base = this->__get_base(__iob);
-    // Stage 2
-    char_type __thousands_sep;
-    const int __atoms_size = __num_get_base::__int_chr_cnt;
-    char_type __atoms1[__atoms_size];
-    const char_type* __atoms = this->__do_widen(__iob, __atoms1);
-    string __grouping        = this->__stage2_int_prep(__iob, __thousands_sep);
-    string __buf;
-    __buf.resize(__buf.capacity());
-    char* __a     = &__buf[0];
-    char* __a_end = __a;
-    unsigned __g[__num_get_base::__num_get_buf_sz];
-    unsigned* __g_end = __g;
-    unsigned __dc     = 0;
-    for (; __b != __e; ++__b) {
-      if (__a_end == __a + __buf.size()) {
-        size_t __tmp = __buf.size();
-        __buf.resize(2 * __buf.size());
-        __buf.resize(__buf.capacity());
-        __a     = &__buf[0];
-        __a_end = __a + __tmp;
+    // Calculate the actual number
+    _Unsigned __val   = 0;
+    bool __overflowed = false;
+    for (; __first != __last; ++__first) {
+      auto __c = *__first;
+      if (!__grouping.empty() && __c == __thousands_sep) {
+        if (__g_end - __g < this->__num_get_buf_sz) {
+          *__g_end++ = __dc;
+          __dc       = 0;
+        }
+        continue;
       }
-      if (this->__stage2_int_loop(
-              *__b,
-              __base,
-              __a,
-              __a_end,
-              __dc,
-              __thousands_sep,
-              __grouping,
-              __g,
-              __g_end,
-              const_cast<char_type*>(__atoms)))
+      auto __offset = this->__atoms_offset(__atoms, __c);
+      if (__offset >= 22) // Not a valid integer character
+        break;
+
+      if (__base == 16 && __offset >= 16)
+        __offset -= 6;
+      if (__offset >= __base)
         break;
+      // __val = (__val * __base) + __offset
+      __overflowed |= __builtin_mul_overflow(__val, __base, std::addressof(__val)) ||
+                      __builtin_add_overflow(__val, __offset, std::addressof(__val));
+      __parsed_num = true;
+      ++__dc;
     }
+
+    if (!__parsed_num) {
+      __err |= ios_base::failbit;
+      __v = 0;
+    } else if (__overflowed) {
+      __err |= ios_base::failbit;
+      __v = is_signed<_MaybeSigned>::value && __negate
+              ? numeric_limits<_MaybeSigned>::min()
+              : numeric_limits<_MaybeSigned>::max();
+    } else if (!__negate) {
+      if (__val > static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max())) {
+        __err |= ios_base::failbit;
+        __v = numeric_limits<_MaybeSigned>::max();
+      } else {
+        __v = __val;
+      }
+    } else if (is_signed<_MaybeSigned>::value) {
+      if (__val > static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max()) + 1) {
+        __err |= ios_base::failbit;
+        __v = numeric_limits<_MaybeSigned>::min();
+      } else if (__val == static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max()) + 1) {
+        __v = numeric_limits<_MaybeSigned>::min();
+      } else {
+        __v = -__val;
+      }
+    } else {
+      __v = -__val;
+    }
+
     if (__grouping.size() != 0 && __g_end - __g < __num_get_base::__num_get_buf_sz)
       *__g_end++ = __dc;
-    // Stage 3
-    __v = std::__num_get_unsigned_integral<_Unsigned>(__a, __a_end, __err, __base);
+
     // Digit grouping checked
     __check_grouping(__grouping, __g, __g_end, __err);
     // EOF checked
-    if (__b == __e)
+    if (__first == __last)
       __err |= ios_base::eofbit;
-    return __b;
+    return __first;
   }
 
   virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, bool& __v) const;
 
   virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long& __v) const {
-    return this->__do_get_signed(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long long& __v) const {
-    return this->__do_get_signed(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned short& __v) const {
-    return this->__do_get_unsigned(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned int& __v) const {
-    return this->__do_get_unsigned(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long& __v) const {
-    return this->__do_get_unsigned(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long long& __v) const {
-    return this->__do_get_unsigned(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, float& __v) const {
@@ -654,40 +619,13 @@ _InputIterator num_get<_CharT, _InputIterator>::do_get(
 template <class _CharT, class _InputIterator>
 _InputIterator num_get<_CharT, _InputIterator>::do_get(
     iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, void*& __v) const {
-  // Stage 1
-  int __base = 16;
-  // Stage 2
-  char_type __atoms[__num_get_base::__int_chr_cnt];
-  char_type __thousands_sep = char_type();
-  string __grouping;
-  std::use_facet<ctype<_CharT> >(__iob.getloc())
-      .widen(__num_get_base::__src, __num_get_base::__src + __num_get_base::__int_chr_cnt, __atoms);
-  string __buf;
-  __buf.resize(__buf.capacity());
-  char* __a     = &__buf[0];
-  char* __a_end = __a;
-  unsigned __g[__num_get_base::__num_get_buf_sz];
-  unsigned* __g_end = __g;
-  unsigned __dc     = 0;
-  for (; __b != __e; ++__b) {
-    if (__a_end == __a + __buf.size()) {
-      size_t __tmp = __buf.size();
-      __buf.resize(2 * __buf.size());
-      __buf.resize(__buf.capacity());
-      __a     = &__buf[0];
-      __a_end = __a + __tmp;
-    }
-    if (this->__stage2_int_loop(*__b, __base, __a, __a_end, __dc, __thousands_sep, __grouping, __g, __g_end, __atoms))
-      break;
-  }
-  // Stage 3
-  __buf.resize(__a_end - __a);
-  if (__locale::__sscanf(__buf.c_str(), _LIBCPP_GET_C_LOCALE, "%p", &__v) != 1)
-    __err = ios_base::failbit;
-  // EOF checked
-  if (__b == __e)
-    __err |= ios_base::eofbit;
-  return __b;
+  auto __flags = __iob.flags();
+  __iob.flags((__flags & ~ios_base::basefield & ~ios_base::uppercase) | ios_base::hex);
+  uintptr_t __ptr;
+  auto __res = __do_get_integral(__b, __e, __iob, __err, __ptr);
+  __iob.flags(__flags);
+  __v = reinterpret_cast<void*>(__ptr);
+  return __res;
 }
 
 extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_get<char>;
diff --git a/libcxx/include/__locale_dir/support/bsd_like.h b/libcxx/include/__locale_dir/support/bsd_like.h
index 27735529d5524..6f533b4e1eab1 100644
--- a/libcxx/include/__locale_dir/support/bsd_like.h
+++ b/libcxx/include/__locale_dir/support/bsd_like.h
@@ -79,15 +79,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __
   return ::strtold_l(__nptr, __endptr, __loc);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::strtoll_l(__nptr, __endptr, __base, __loc);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::strtoull_l(__nptr, __endptr, __base, __loc);
-}
-
 //
 // Character manipulation functions
 //
@@ -211,12 +202,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __
     char** __s, __locale_t __loc, const char* __format, _Args&&... __args) {
   return ::asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...); // non-standard
 }
-
-template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) {
-  return ::sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...);
-}
 _LIBCPP_DIAGNOSTIC_POP
 #undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT
 
diff --git a/libcxx/include/__locale_dir/support/fuchsia.h b/libcxx/include/__locale_dir/support/fuchsia.h
index 4b9e63facb19e..528bfeb0cb6e1 100644
--- a/libcxx/include/__locale_dir/support/fuchsia.h
+++ b/libcxx/include/__locale_dir/support/fuchsia.h
@@ -141,13 +141,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __
   __locale_guard __current(__loc);
   return ::asprintf(__s, __format, std::forward<_Args>(__args)...); // non-standard
 }
-template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) {
-  __locale_guard __current(__loc);
-  return std::sscanf(__s, __format, std::forward<_Args>(__args)...);
-}
-
 _LIBCPP_DIAGNOSTIC_POP
 #undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT
 
diff --git a/libcxx/include/__locale_dir/support/linux.h b/libcxx/include/__locale_dir/support/linux.h
index 94a2ecb9a940d..1a589be49bf1d 100644
--- a/libcxx/include/__locale_dir/support/linux.h
+++ b/libcxx/include/__locale_dir/support/linux.h
@@ -94,25 +94,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __
   return ::strtold_l(__nptr, __endptr, __loc);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-#if !_LIBCPP_HAS_MUSL_LIBC
-  return ::strtoll_l(__nptr, __endptr, __base, __loc);
-#else
-  (void)__loc;
-  return ::strtoll(__nptr, __endptr, __base);
-#endif
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-#if !_LIBCPP_HAS_MUSL_LIBC
-  return ::strtoull_l(__nptr, __endptr, __base, __loc);
-#else
-  (void)__loc;
-  return ::strtoull(__nptr, __endptr, __base);
-#endif
-}
-
 //
 // Character manipulation functions
 //
@@ -257,20 +238,6 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf(
   va_end(__va);
   return __res;
 }
-
-#ifndef _LIBCPP_COMPILER_GCC // GCC complains that this can't be always_inline due to C-style varargs
-_LIBCPP_HIDE_FROM_ABI
-#endif
-inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __s, __locale_t __loc, const char* __format, ...) {
-  va_list __va;
-  va_start(__va, __format);
-  __locale_guard __current(__loc);
-  int __res = std::vsscanf(__s, __format, __va);
-  va_end(__va);
-  return __res;
-}
-
 } // namespace __locale
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/libcxx/include/__locale_dir/support/no_locale/strtonum.h b/libcxx/include/__locale_dir/support/no_locale/strtonum.h
index 0e7a32993e736..59544e10e4a4c 100644
--- a/libcxx/include/__locale_dir/support/no_locale/strtonum.h
+++ b/libcxx/include/__locale_dir/support/no_locale/strtonum.h
@@ -34,15 +34,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __
   return std::strtold(__nptr, __endptr);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t) {
-  return std::strtoll(__nptr, __endptr, __base);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t) {
-  return std::strtoull(__nptr, __endptr, __base);
-}
-
 } // namespace __locale
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/libcxx/include/__locale_dir/support/windows.h b/libcxx/include/__locale_dir/support/windows.h
index edd8a66c23e80..644ef68adf545 100644
--- a/libcxx/include/__locale_dir/support/windows.h
+++ b/libcxx/include/__locale_dir/support/windows.h
@@ -186,14 +186,6 @@ inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr
   return ::_strtod_l(__nptr, __endptr, __loc);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::_strtoi64_l(__nptr, __endptr, __base, __loc);
-}
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::_strtoui64_l(__nptr, __endptr, __base, __loc);
-}
-
 //
 // Character manipulation functions
 //
@@ -276,23 +268,6 @@ _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snpri
 _LIBCPP_EXPORTED_FROM_ABI
 _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf(char** __ret, __locale_t __loc, const char* __format, ...);
 
-_LIBCPP_DIAGNOSTIC_PUSH
-_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat")
-_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") // GCC doesn't support [[gnu::format]] on variadic templates
-#ifdef _LIBCPP_COMPILER_CLANG_BASED
-#  define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) _LIBCPP_ATTRIBUTE_FORMAT(__VA_ARGS__)
-#else
-#  define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) /* nothing */
-#endif
-
-template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __dest, __locale_t __loc, const char* __format, _Args&&... __args) {
-  return ::_sscanf_l(__dest, __format, __loc, std::forward<_Args>(__args)...);
-}
-_LIBCPP_DIAGNOSTIC_POP
-#undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT
-
 #if defined(_LIBCPP_BUILDING_LIBRARY)
 struct __locale_guard {
   _LIBCPP_HIDE_FROM_ABI __locale_guard(__locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) {
diff --git a/libcxx/include/__support/xlocale/__strtonum_fallback.h b/libcxx/include/__support/xlocale/__strtonum_fallback.h
index 5275aead35af9..90bd59d36c248 100644
--- a/libcxx/include/__support/xlocale/__strtonum_fallback.h
+++ b/libcxx/include/__support/xlocale/__strtonum_fallback.h
@@ -34,12 +34,4 @@ inline _LIBCPP_HIDE_FROM_ABI long double strtold_l(const char* __nptr, char** __
   return ::strtold(__nptr, __endptr);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long strtoll_l(const char* __nptr, char** __endptr, int __base, locale_t) {
-  return ::strtoll(__nptr, __endptr, __base);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, char** __endptr, int __base, locale_t) {
-  return ::strtoull(__nptr, __endptr, __base);
-}
-
 #endif // _LIBCPP___SUPPORT_XLOCALE_STRTONUM_FALLBACK_H
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index 57d66cd1ccaef..492708792cbbf 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -1591,7 +1591,6 @@ module std [system] {
     module locale_base_api {
       textual header "__locale_dir/locale_base_api/bsd_locale_fallbacks.h"
       textual header "__locale_dir/locale_base_api/ibm.h"
-      textual header "__locale_dir/locale_base_api/musl.h"
       textual header "__locale_dir/locale_base_api/openbsd.h"
     }
     export *
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 0f695d4f1a229..2081e75fdf64b 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -5557,6 +5557,54 @@ string __num_get<_CharT>::__stage2_int_prep(ios_base& __iob, _CharT* __atoms, _C
   return __np.grouping();
 }
 
+template <class _CharT>
+int __num_get<_CharT>::__stage2_int_loop(
+    _CharT __ct,
+    int __base,
+    char* __a,
+    char*& __a_end,
+    unsigned& __dc,
+    _CharT __thousands_sep,
+    const string& __grouping,
+    unsigned* __g,
+    unsigned*& __g_end,
+    _CharT* __atoms) {
+  if (__a_end == __a && (__ct == __atoms[24] || __ct == __atoms[25])) {
+    *__a_end++ = __ct == __atoms[24] ? '+' : '-';
+    __dc       = 0;
+    return 0;
+  }
+  if (__grouping.size() != 0 && __ct == __thousands_sep) {
+    if (__g_end - __g < __num_get_buf_sz) {
+      *__g_end++ = __dc;
+      __dc       = 0;
+    }
+    return 0;
+  }
+  ptrdiff_t __f = __atoms_offset(__atoms, __ct);
+  if (__f >= 24)
+    return -1;
+  switch (__base) {
+  case 8:
+  case 10:
+    if (__f >= __base)
+      return -1;
+    break;
+  case 16:
+    if (__f < 22)
+      break;
+    if (__a_end != __a && __a_end - __a <= 2 && __a_end[-1] == '0') {
+      __dc       = 0;
+      *__a_end++ = __src[__f];
+      return 0;
+    }
+    return -1;
+  }
+  *__a_end++ = __src[__f];
+  ++__dc;
+  return 0;
+}
+
 template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS collate<char>;
 _LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS collate<wchar_t>;)
 
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp
index 015408294bc8c..a110aae2db11b 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp
@@ -13,6 +13,8 @@
 // iter_type get(iter_type in, iter_type end, ios_base&,
 //               ios_base::iostate& err, long& v) const;
 
+// XFAIL: FROZEN-CXX03-HEADERS-FIXME
+
 #include <locale>
 #include <ios>
 #include <cassert>
@@ -98,6 +100,18 @@ int main(int, char**)
         assert(err == ios.goodbit);
         assert(v == 291);
     }
+    {
+        const char str[] = "a123";
+        std::dec(ios);
+        std::ios_base::iostate err = ios.goodbit;
+        cpp17_input_iterator<const char*> iter =
+            f.get(cpp17_input_iterator<const char*>(str),
+                  cpp17_input_iterator<const char*>(str+sizeof(str)),
+                  ios, err, v);
+        assert(base(iter) == str);
+        assert(err == ios.failbit);
+        assert(v == 0);
+    }
     {
         const char str[] = "0x123";
         std::hex(ios);
@@ -519,6 +533,142 @@ int main(int, char**)
         assert(err == ios.failbit);
         assert(v == std::numeric_limits<long>::max());
     }
+    {
+      v                          = -1;
+      const char str[]           = "";
+      std::ios_base::iostate err = ios.goodbit;
+
+      cpp17_input_iterator<const char*> iter =
+          f.get(cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str), ios, err, v);
+      assert(base(iter) == str);
+      assert(err == (std::ios::eofbit | std::ios::failbit));
+      assert(v == 0);
+    }
+    {
+      v                          = -1;
+      const char str[]           = "+";
+      std::ios_base::iostate err = ios.goodbit;
+
+      cpp17_input_iterator<const char*> iter =
+          f.get(cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str + 1), ios, err, v);
+      assert(base(iter) == str + 1);
+      assert(err == (std::ios::eofbit | std::ios::failbit));
+      assert(v == 0);
+    }
+    {
+      v                          = -1;
+      const char str[]           = "+";
+      std::ios_base::iostate err = ios.goodbit;
+
+      cpp17_input_iterator<const char*> iter = f.get(
+          cpp17_input_iterator<const char*>(std::begin(str)),
+          cpp17_input_iterator<const char*>(std::end(str)),
+          ios,
+          err,
+          v);
+      assert(base(iter) == str + 1);
+      assert(err == ios.failbit);
+      assert(v == 0);
+    }
+    {
+      v                          = -1;
+      const char str[]           = "-";
+      std::ios_base::iostate err = ios.goodbit;
+
+      cpp17_input_iterator<const char*> iter = f.get(
+          cpp17_input_iterator<const char*>(std::begin(str)),
+          cpp17_input_iterator<const char*>(std::end(str)),
+          ios,
+          err,
+          v);
+      assert(base(iter) == str + 1);
+      assert(err == ios.failbit);
+      assert(v == 0);
+    }
+    {
+      v                          = -1;
+      const char str[]           = "0";
+      std::ios_base::iostate err = ios.goodbit;
+
+      cpp17_input_iterator<const char*> iter = f.get(
+          cpp17_input_iterator<const char*>(std::begin(str)),
+          cpp17_input_iterator<const char*>(std::end(str)),
+          ios,
+          err,
+          v);
+      assert(base(iter) == str + 1);
+      assert(err == ios.goodbit);
+      assert(v == 0);
+    }
+    {
+      v                          = -1;
+      const char str[]           = "078";
+      std::ios_base::iostate err = ios.goodbit;
+
+      ios.flags(ios.flags() & ~ios.basefield);
+      cpp17_input_iterator<const char*> iter = f.get(
+          cpp17_input_iterator<const char*>(std::begin(str)),
+          cpp17_input_iterator<const char*>(std::end(str)),
+          ios,
+          err,
+          v);
+      assert(base(iter) == str + 2);
+      assert(err == ios.goodbit);
+      assert(v == 7);
+      ios.flags(ios.flags() | ios.dec);
+    }
+    {
+      v                          = -1;
+      std::string str = std::to_string(std::numeric_limits<unsigned long>::max()) + "99a";
+      std::ios_base::iostate err = ios.goodbit;
+
+      cpp17_input_iterator<const char*> iter = f.get(
+          cpp17_input_iterator<const char*>(str.data()),
+          cpp17_input_iterator<const char*>(str.data() + str.size()),
+          ios,
+          err,
+          v);
+      assert(base(iter) == str.data() + str.size() - 1);
+      assert(err == ios.failbit);
+      assert(v == std::numeric_limits<long>::max());
+    }
+    {
+        std::string str = std::to_string(std::numeric_limits<long>::max()) + 'c';
+        std::ios_base::iostate err = ios.goodbit;
+        cpp17_input_iterator<const char*> iter =
+            f.get(cpp17_input_iterator<const char*>(str.data()),
+                  cpp17_input_iterator<const char*>(str.data() + str.size()),
+                  ios, err, v);
+        assert(base(iter) == str.data() + str.size() - 1);
+        assert(err == ios.goodbit);
+        assert(v == std::numeric_limits<long>::max());
+    }
+    {
+      std::string str = std::to_string(static_cast<unsigned long>(std::numeric_limits<long>::max()) + 1) + 'c';
+      std::ios_base::iostate err             = ios.goodbit;
+      cpp17_input_iterator<const char*> iter = f.get(
+          cpp17_input_iterator<const char*>(str.data()),
+          cpp17_input_iterator<const char*>(str.data() + str.size()),
+          ios,
+          err,
+          v);
+      assert(base(iter) == str.data() + str.size() - 1);
+      assert(err == ios.failbit);
+      assert(v == std::numeric_limits<long>::max());
+    }
+    {
+      std::string str = '-' + std::to_string(static_cast<unsigned long>(std::numeric_limits<long>::max()) + 2) + 'c';
+      std::ios_base::iostate err             = ios.goodbit;
+      cpp17_input_iterator<const char*> iter = f.get(
+          cpp17_input_iterator<const char*>(str.data()),
+          cpp17_input_iterator<const char*>(str.data() + str.size()),
+          ios,
+          err,
+          v);
+      assert(base(iter) == str.data() + str.size() - 1);
+      assert(err == ios.failbit);
+      assert(v == std::numeric_limits<long>::min());
+    }
 
   return 0;
 }
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_int.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_int.pass.cpp
index bee1be08baafc..f9cef08e247d0 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_int.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_int.pass.cpp
@@ -68,6 +68,17 @@ int main(int, char**)
         assert(err == ios.goodbit);
         assert(v == 1);
     }
+    {
+        const char str[] = "-1";
+        std::ios_base::iostate err = ios.goodbit;
+        cpp17_input_iterator<const char*> iter =
+            f.get(cpp17_input_iterator<const char*>(str),
+                  cpp17_input_iterator<const char*>(str+sizeof(str)),
+                  ios, err, v);
+        assert(base(iter) == str+sizeof(str)-1);
+        assert(err == ios.goodbit);
+        assert(v == std::numeric_limits<unsigned int>::max());
+    }
     std::hex(ios);
     {
         const char str[] = "0xFFFFFFFF";
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long.pass.cpp
index b087bdcd94017..fed6fc0246d82 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long.pass.cpp
@@ -68,6 +68,17 @@ int main(int, char**)
         assert(err == ios.goodbit);
         assert(v == 1);
     }
+    {
+        const char str[] = "-1";
+        std::ios_base::iostate err = ios.goodbit;
+        cpp17_input_iterator<const char*> iter =
+            f.get(cpp17_input_iterator<const char*>(str),
+                  cpp17_input_iterator<const char*>(str+sizeof(str)),
+                  ios, err, v);
+        assert(base(iter) == str+sizeof(str)-1);
+        assert(err == ios.goodbit);
+        assert(v == std::numeric_limits<unsigned long>::max());
+    }
     std::hex(ios);
     {
         const char str[] = "0xFFFFFFFF";
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long_long.pass.cpp
index 6769aebe424e3..0bdb6c1c38606 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long_long.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long_long.pass.cpp
@@ -68,6 +68,17 @@ int main(int, char**)
         assert(err == ios.goodbit);
         assert(v == 1);
     }
+    {
+        const char str[] = "-1";
+        std::ios_base::iostate err = ios.goodbit;
+        cpp17_input_iterator<const char*> iter =
+            f.get(cpp17_input_iterator<const char*>(str),
+                  cpp17_input_iterator<const char*>(str+sizeof(str)),
+                  ios, err, v);
+        assert(base(iter) == str+sizeof(str)-1);
+        assert(err == ios.goodbit);
+        assert(v == std::numeric_limits<unsigned long long>::max());
+    }
     std::hex(ios);
     {
         const char str[] = "0xFFFFFFFFFFFFFFFF";
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_short.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_short.pass.cpp
index bec9a7ff9e3bb..decfbe943461f 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_short.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_short.pass.cpp
@@ -68,6 +68,17 @@ int main(int, char**)
         assert(err == ios.goodbit);
         assert(v == 1);
     }
+    {
+        const char str[] = "-1";
+        std::ios_base::iostate err = ios.goodbit;
+        cpp17_input_iterator<const char*> iter =
+            f.get(cpp17_input_iterator<const char*>(str),
+                  cpp17_input_iterator<const char*>(str+sizeof(str)),
+                  ios, err, v);
+        assert(base(iter) == str+sizeof(str)-1);
+        assert(err == ios.goodbit);
+        assert(v == std::numeric_limits<unsigned short>::max());
+    }
     std::hex(ios);
     {
         const char str[] = "0xFFFF";

From bb78728826ff57f3df859e79bfd857b5a175bb6d Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Mon, 24 Nov 2025 15:55:19 +0000
Subject: [PATCH 03/37] Reland "RegisterCoalescer: Add implicit-def of super
 register when coalescing SUBREG_TO_REG"

A SUBREG_TO_REG instruction expresses that the top bits of the result
register are set to a certain value (e.g. 0).

The example below expresses that the result of %1 will have the top 32
bits zeroed and the lower 32bits being equal to the result of INSTR.
```
    %0:gpr32 = INSTR
    %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
```
When the RegisterCoalescer tries to remove SUBREG_TO_REG instructions by
coalescing %0 into %1, it must keep the same semantics. Currently
however, the RegisterCoalescer would emit:
```
    %1.sub32:gpr64 = INSTR
```
which no longer expresses that the top 32-bits of the register are
defined (zeroed) by INSTR.

This may cause issues with e.g. machine copy propagation where the pass
may think it can remove a COPY-like instruction because the MIR says
only the bottom 32-bits are defined/used, even though other uses of the
register rely on the top 32-bits being zeroed by the COPY-like
instruction.

This PR changes the RegisterCoalescer to instead emit:
```
    undef %1.sub32:gpr64 = MOVimm32 42, implicit-def %1
```
to express that the entire contents of %1:gpr64 are defined by the
instruction.

This tries to reland #134408 which had to be reverted due to a few reported
failures.
---
 llvm/lib/CodeGen/RegisterCoalescer.cpp        | 185 ++++++-
 llvm/lib/CodeGen/SplitKit.cpp                 |   2 +-
 .../AArch64/GlobalISel/arm64-pcsections.ll    |  64 +--
 .../implicit-def-subreg-to-reg-regression.ll  |   4 +-
 llvm/test/CodeGen/AArch64/pr151592.mir        | 168 +++++++
 llvm/test/CodeGen/AArch64/pr151888.mir        |  17 +
 llvm/test/CodeGen/AArch64/pr164181-reduced.ll | 183 +++++++
 .../AArch64/preserve_nonecc_varargs_darwin.ll |  10 +-
 ...er-coalesce-implicit-def-subreg-to-reg.mir |  45 ++
 ...gister-coalesce-update-subranges-remat.mir | 161 ++++++-
 .../CodeGen/LoongArch/lasx/build-vector.ll    |  14 +-
 llvm/test/CodeGen/LoongArch/lasx/fpowi.ll     |  16 +-
 .../LoongArch/lasx/scalar-to-vector.ll        |   4 +-
 .../CodeGen/PowerPC/aix-vec_insert_elt.ll     |   4 +
 .../CodeGen/PowerPC/build-vector-tests.ll     |  48 ++
 .../PowerPC/canonical-merge-shuffles.ll       |   6 +
 llvm/test/CodeGen/PowerPC/combine-fneg.ll     |   1 +
 llvm/test/CodeGen/PowerPC/fp-strict-round.ll  |   6 +
 llvm/test/CodeGen/PowerPC/frem.ll             |   3 +
 .../PowerPC/froundeven-legalization.ll        |   8 +
 llvm/test/CodeGen/PowerPC/half.ll             |   1 +
 llvm/test/CodeGen/PowerPC/ldexp.ll            |   2 +
 llvm/test/CodeGen/PowerPC/llvm.modf.ll        |   1 +
 llvm/test/CodeGen/PowerPC/vec_insert_elt.ll   |   4 +
 .../vector-constrained-fp-intrinsics.ll       | 176 +++++++
 ...coalescer-breaks-subreg-to-reg-liveness.ll | 185 +++++++
 ...icit-def-regression-imp-operand-assert.mir |   6 +-
 ...subreg-to-reg-requires-subrange-update.mir |  44 ++
 llvm/test/CodeGen/X86/pr76416.ll              |  79 +++
 llvm/test/CodeGen/X86/subreg-fail.mir         |   4 +-
 .../CodeGen/X86/subreg-to-reg-coalescing.mir  | 451 ++++++++++++++++++
 31 files changed, 1823 insertions(+), 79 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/pr151592.mir
 create mode 100644 llvm/test/CodeGen/AArch64/pr151888.mir
 create mode 100644 llvm/test/CodeGen/AArch64/pr164181-reduced.ll
 create mode 100644 llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir
 create mode 100644 llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll
 create mode 100644 llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir
 create mode 100644 llvm/test/CodeGen/X86/pr76416.ll
 create mode 100644 llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir

diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index e624088a0964e..ce933c5f1be85 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -307,7 +307,13 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
   /// number if it is not zero. If DstReg is a physical register and the
   /// existing subregister number of the def / use being updated is not zero,
   /// make sure to set it to the correct physical subregister.
-  void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
+  ///
+  /// If \p SubregToRegSrcInsts is not empty, we are coalescing a
+  /// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
+  /// implicit-def of DstReg on instructions that define SrcReg.
+  void updateRegDefsUses(
+      Register SrcReg, Register DstReg, unsigned SubIdx,
+      SmallPtrSetImpl<MachineInstr *> *SubregToRegSrcInsts = nullptr);
 
   /// If the given machine operand reads only undefined lanes add an undef
   /// flag.
@@ -1444,6 +1450,7 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
 
   // CopyMI may have implicit operands, save them so that we can transfer them
   // over to the newly materialized instruction after CopyMI is removed.
+  LaneBitmask NewMIImplicitOpsMask;
   SmallVector<MachineOperand, 4> ImplicitOps;
   ImplicitOps.reserve(CopyMI->getNumOperands() -
                       CopyMI->getDesc().getNumOperands());
@@ -1458,6 +1465,9 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
               (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
              "unexpected implicit virtual register def");
       ImplicitOps.push_back(MO);
+      if (MO.isDef() && MO.getReg().isVirtual() &&
+          MRI->shouldTrackSubRegLiveness(DstReg))
+        NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
     }
   }
 
@@ -1494,14 +1504,11 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
       } else {
         assert(MO.getReg() == NewMI.getOperand(0).getReg());
 
-        // We're only expecting another def of the main output, so the range
-        // should get updated with the regular output range.
-        //
-        // FIXME: The range updating below probably needs updating to look at
-        // the super register if subranges are tracked.
-        assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
-               "subrange update for implicit-def of super register may not be "
-               "properly handled");
+        // If lanemasks need to be tracked, compile the lanemask of the NewMI
+        // implicit def operands to avoid subranges for the super-regs from
+        // being removed by code later on in this function.
+        if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
+          NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
       }
     }
   }
@@ -1617,7 +1624,8 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
           *LIS->getSlotIndexes(), *TRI);
 
       for (LiveInterval::SubRange &SR : DstInt.subranges()) {
-        if ((SR.LaneMask & DstMask).none()) {
+        if ((SR.LaneMask & DstMask).none() &&
+            (SR.LaneMask & NewMIImplicitOpsMask).none()) {
           LLVM_DEBUG(dbgs()
                      << "Removing undefined SubRange "
                      << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
@@ -1891,11 +1899,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
   }
 }
 
-void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
-                                          unsigned SubIdx) {
+void RegisterCoalescer::updateRegDefsUses(
+    Register SrcReg, Register DstReg, unsigned SubIdx,
+    SmallPtrSetImpl<MachineInstr *> *SubregToRegSrcInsts) {
   bool DstIsPhys = DstReg.isPhysical();
   LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
 
+  // Coalescing a COPY may expose reads of 'undef' subregisters.
+  // If so, then explicitly propagate 'undef' to those operands.
   if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
     for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
       if (MO.isUndef())
@@ -1912,6 +1923,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
     }
   }
 
+  // If DstInt already has a subrange for the unused lanes, then we shouldn't
+  // create duplicate subranges when we update the interval for unused lanes.
+  LaneBitmask DstIntLaneMask;
+  if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+    for (LiveInterval::SubRange &SR : DstInt->subranges())
+      DstIntLaneMask |= SR.LaneMask;
+  }
+
+  // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
   SmallPtrSet<MachineInstr *, 8> Visited;
   for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
                                                E = MRI->reg_instr_end();
@@ -1935,6 +1955,82 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
     if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
       Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
 
+    bool RequiresImplicitRedef = false;
+    if (SubregToRegSrcInsts && !SubregToRegSrcInsts->empty()) {
+      // We can only add an implicit-def and undef if the sub registers match,
+      // e.g.
+      //  %0:gr32      = INSTX
+      //  %0.sub8:gr32 = INSTY           // top 24 bits of %0 still defined
+      //  %1:gr64      = SUBREG_TO_REG 0, %0, %subreg.sub32
+      //
+      // This cannot be transformed into:
+      //  %1.sub32:gr64      = INSTX
+      //  undef %1.sub8:gr64 = INSTY , implicit-def %1
+      //
+      // because the undef means that none of the bits of %1 are read, thus
+      // thrashing the top 24 bits of %1.sub32.
+      if (SubregToRegSrcInsts->contains(UseMI) &&
+          all_of(UseMI->all_defs(),
+                 [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
+                   if (MO.getReg() != SrcReg) // Ignore unrelated registers
+                     return true;
+                   return MO.isUndef() ||
+                          (SubIdx &&
+                           (!MO.getSubReg() || SubIdx == MO.getSubReg()));
+                 })) {
+        // Add implicit-def of super-register to express that the whole
+        // register is defined by the instruction.
+        UseMI->addRegisterDefined(DstReg);
+        RequiresImplicitRedef = true;
+      }
+
+      // If the coalesed instruction doesn't fully define the register, we need
+      // to preserve the original super register liveness for SUBREG_TO_REG.
+      //
+      // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
+      // but it introduces liveness for other subregisters. Downstream users may
+      // have been relying on those bits, so we need to ensure their liveness is
+      // captured with a def of other lanes.
+      if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+        // First check if there is sufficient granularity in terms of subranges.
+        LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
+        LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+        LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
+        if ((UnusedLanes & ~DstIntLaneMask).any()) {
+          BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+          DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
+          DstIntLaneMask |= UnusedLanes;
+        }
+
+        // After duplicating the live ranges for the low/hi bits, we
+        // need to update the subranges of the DstReg interval such that
+        // for a case like this:
+        //
+        //       entry:
+        //  16B    %1:gpr32 = INSTRUCTION    (<=> UseMI)
+        //            :
+        //       if.then:
+        //  32B    %1:gpr32 = MOVIMM32 ..
+        //  48B    %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
+        //
+        //  Only the MOVIMM32 requires a def of the top lanes and any intervals
+        //  for the top 32-bits of the def at 16B should be removed.
+        for (LiveInterval::SubRange &SR : DstInt->subranges()) {
+          if (!Writes || RequiresImplicitRedef ||
+              (SR.LaneMask & UnusedLanes).none())
+            continue;
+
+          assert((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
+                 "Unexpected lanemask. Subrange needs finer granularity");
+
+          SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot();
+          auto SegmentI = SR.find(UseIdx);
+          if (SegmentI != SR.end())
+            SR.removeSegment(SegmentI, true);
+        }
+      }
+    }
+
     // Replace SrcReg with DstReg in all UseMI operands.
     for (unsigned Op : Ops) {
       MachineOperand &MO = UseMI->getOperand(Op);
@@ -1943,7 +2039,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
       // turn a full def into a read-modify-write sub-register def and vice
       // versa.
       if (SubIdx && MO.isDef())
-        MO.setIsUndef(!Reads);
+        MO.setIsUndef(!Reads || RequiresImplicitRedef);
 
       // A subreg use of a partially undef (super) register may be a complete
       // undef use now and then has to be marked that way.
@@ -2046,6 +2142,38 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
   LIS->shrinkToUses(&LI);
 }
 
+/// For a given use of value \p Idx, it returns the def in the current block,
+/// or otherwise all possible defs in preceding blocks.
+static bool findPrecedingDefs(SmallPtrSetImpl<MachineInstr *> &Instrs,
+                              LiveIntervals *LIS, LiveInterval &SrcInt,
+                              MachineBasicBlock *MBB, VNInfo *Idx) {
+  auto IsPrecedingDef = [&](VNInfo *Idx) -> bool {
+    if (Idx->isPHIDef())
+      return false;
+    MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def);
+    assert(Def && "Unable to find a def for SUBREG_TO_REG source operand");
+    Instrs.insert(Def);
+    return true;
+  };
+
+  if (IsPrecedingDef(Idx))
+    return true;
+
+  SmallVector<MachineBasicBlock *> Worklist(MBB->pred_begin(), MBB->pred_end());
+  SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks;
+  while (!Worklist.empty()) {
+    MachineBasicBlock *MBB = Worklist.pop_back_val();
+    auto [_, Inserted] = VisitedBlocks.insert(MBB);
+    if (!Inserted)
+      continue;
+    VNInfo *Idx = SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(MBB));
+    if (!IsPrecedingDef(Idx))
+      Worklist.append(MBB->pred_begin(), MBB->pred_end());
+  }
+
+  return !Instrs.empty();
+}
+
 bool RegisterCoalescer::joinCopy(
     MachineInstr *CopyMI, bool &Again,
     SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
@@ -2183,6 +2311,34 @@ bool RegisterCoalescer::joinCopy(
     });
   }
 
+  SmallPtrSet<MachineInstr *, 4> SubregToRegSrcInsts;
+  Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
+  if (CopyMI->isSubregToReg() && !SrcReg.isPhysical()) {
+    // For the case where the copy instruction is a SUBREG_TO_REG, e.g.
+    //
+    //   %0:gpr32 = movimm32 ..
+    //   %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
+    //   ...
+    //   %0:gpr32 = COPY <something>
+    //
+    // After joining liveranges, the original `movimm32` will need an
+    // implicit-def to make it explicit that the entire register is written,
+    // i.e.
+    //
+    //   undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
+    //   ...
+    //   undef %0.sub32:gpr64 = COPY <something>  // Note that this does not
+    //                                            // require an implicit-def,
+    //                                            // because it has nothing to
+    //                                            // do with the SUBREG_TO_REG.
+    LiveInterval &SrcInt = LIS->getInterval(SrcReg);
+    SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI);
+    if (!findPrecedingDefs(SubregToRegSrcInsts, LIS, SrcInt,
+                           CopyMI->getParent(),
+                           SrcInt.Query(SubregToRegSlotIdx).valueIn()))
+      llvm_unreachable("SUBREG_TO_REG src requires a def");
+  }
+
   ShrinkMask = LaneBitmask::getNone();
   ShrinkMainRange = false;
 
@@ -2253,7 +2409,8 @@ bool RegisterCoalescer::joinCopy(
   // Also update DstReg operands to include DstIdx if it is set.
   if (CP.getDstIdx())
     updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
-  updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
+  updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
+                    &SubregToRegSrcInsts);
 
   // Shrink subregister ranges if necessary.
   if (ShrinkMask.any()) {
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index 8ec4bfbb5a330..cf064b90a7d34 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -448,7 +448,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) {
     const MachineInstr *DefMI = LIS.getInstructionFromIndex(Def);
     assert(DefMI != nullptr);
     LaneBitmask LM;
-    for (const MachineOperand &DefOp : DefMI->defs()) {
+    for (const MachineOperand &DefOp : DefMI->all_defs()) {
       Register R = DefOp.getReg();
       if (R != LI.reg())
         continue;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index 57481724936a3..cab2741be9929 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -12,7 +12,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT:   successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
@@ -46,13 +46,13 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $w1, $x0, $x2
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def $x9, pcsections !0 :: (load (s32) from %ir.pnew)
+  ; CHECK-NEXT:   renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def renamable $x9, pcsections !0 :: (load (s32) from %ir.pnew)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
   ; CHECK-NEXT:   successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
   ; CHECK-NEXT:   liveins: $w1, $x0, $x9
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
@@ -91,7 +91,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT:   successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
@@ -243,7 +243,7 @@ define i32 @fetch_and_nand(ptr %p) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   renamable $w9 = ANDWri renamable $w8, 2, pcsections !0
   ; CHECK-NEXT:   $w9 = ORNWrs $wzr, killed renamable $w9, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRW killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
@@ -295,7 +295,7 @@ define i32 @fetch_and_or(ptr %p) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   $w10 = ORRWrs renamable $w8, renamable $w9, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRW killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
@@ -726,7 +726,7 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -750,7 +750,7 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRB renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
@@ -773,7 +773,7 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -797,7 +797,7 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -821,7 +821,7 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -845,7 +845,7 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -869,7 +869,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0
@@ -895,7 +895,7 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0
@@ -923,10 +923,10 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
+  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
@@ -951,10 +951,10 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
+  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w11 = STXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
@@ -977,7 +977,7 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -1001,7 +1001,7 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRH renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
@@ -1024,7 +1024,7 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -1048,7 +1048,7 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -1072,7 +1072,7 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -1096,7 +1096,7 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -1120,7 +1120,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0
@@ -1146,7 +1146,7 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0
@@ -1174,10 +1174,10 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
+  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
@@ -1202,10 +1202,10 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
+  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w11 = STXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
@@ -1230,7 +1230,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
   ; CHECK-NEXT:   successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w0 = LDXRB renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = ANDWri renamable $w0, 7, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
@@ -1273,7 +1273,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
   ; CHECK-NEXT:   successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w0 = LDXRH renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = ANDWri renamable $w0, 15, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
index 50fac819d4afe..e37d8f7da4bfc 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
+++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios < %s | FileCheck %s
+; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=false < %s | sed -e "/; kill: /d" | FileCheck %s
+; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=true  < %s | FileCheck %s
 
 ; Check there's no assert in spilling from implicit-def operands on an
 ; IMPLICIT_DEF.
@@ -92,7 +93,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
 ; CHECK-NEXT:    ldr x8, [sp, #40] ; 8-byte Reload
 ; CHECK-NEXT:    mov x0, xzr
 ; CHECK-NEXT:    mov x1, xzr
-; CHECK-NEXT:    ; kill: def $w8 killed $w8 killed $x8 def $x8
 ; CHECK-NEXT:    str x8, [sp]
 ; CHECK-NEXT:    bl _fprintf
 ; CHECK-NEXT:    brk #0x1
diff --git a/llvm/test/CodeGen/AArch64/pr151592.mir b/llvm/test/CodeGen/AArch64/pr151592.mir
new file mode 100644
index 0000000000000..dbcc1f8c08e9a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr151592.mir
@@ -0,0 +1,168 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64 -run-pass=register-coalescer -o - %s | FileCheck %s
+---
+name:            reproducer
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: reproducer
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64common = COPY $x1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
+  ; CHECK-NEXT:   undef [[MOVIv2d_ns:%[0-9]+]].qsub1:zpr2 = MOVIv2d_ns 0, implicit-def [[MOVIv2d_ns]]
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr64sp = COPY $xzr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   TBNZW [[COPY1]], 0, %bb.3
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[MOVIv2d_ns:%[0-9]+]].dsub:zpr2 = LDRDui [[COPY2]], 0, implicit-def [[MOVIv2d_ns]].zsub
+  ; CHECK-NEXT:   ST2Twov2d [[MOVIv2d_ns]].zsub_qsub1, [[COPY]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   STR_ZXI [[MOVIv2d_ns]].zsub1, [[COPY]], 1
+  ; CHECK-NEXT:   STR_ZXI [[MOVIv2d_ns]].zsub1, [[COPY]], 0
+  ; CHECK-NEXT:   B %bb.1
+  bb.0:
+    liveins: $w0, $x1
+    %0:gpr64common = COPY $x1
+    %1:gpr32 = COPY $w0
+    %2:gpr32 = COPY %1:gpr32
+    undef %8.qsub1:qq = MOVIv2d_ns 0
+    %4:zpr = SUBREG_TO_REG 0, %8.qsub1:qq, %subreg.zsub
+    %5:gpr64sp = COPY $xzr
+
+  bb.1:
+    TBNZW %2:gpr32, 0, %bb.3
+    B %bb.2
+
+  bb.2:
+    %8.dsub:qq = LDRDui %5:gpr64sp, 0, implicit-def %8.qsub0:qq
+    ST2Twov2d %8:qq, %0:gpr64common
+
+  bb.3:
+    STR_ZXI %4:zpr, %0:gpr64common, 1
+    STR_ZXI %4:zpr, %0:gpr64common, 0
+    B %bb.1
+...
+---
+name:            reproducer2
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: reproducer2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64common = COPY $x1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
+  ; CHECK-NEXT:   undef [[MOVIv2d_ns:%[0-9]+]].zsub:zpr2 = MOVIv2d_ns 0
+  ; CHECK-NEXT:   [[MOVIv2d_ns:%[0-9]+]].qsub1:zpr2 = MOVIv2d_ns 0
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr64sp = COPY $xzr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   TBNZW [[COPY1]], 0, %bb.3
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[MOVIv2d_ns:%[0-9]+]].dsub:zpr2 = LDRDui [[COPY2]], 0, implicit-def [[MOVIv2d_ns]].zsub
+  ; CHECK-NEXT:   ST2Twov2d [[MOVIv2d_ns]].zsub_qsub1, [[COPY]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   STR_ZXI [[MOVIv2d_ns]].zsub1, [[COPY]], 1
+  ; CHECK-NEXT:   STR_ZXI [[MOVIv2d_ns]].zsub1, [[COPY]], 0
+  ; CHECK-NEXT:   B %bb.1
+  bb.0:
+    liveins: $w0, $x1
+    %0:gpr64common = COPY $x1
+    %1:gpr32 = COPY $w0
+    %2:gpr32 = COPY %1:gpr32
+    undef %8.qsub0:qq = MOVIv2d_ns 0
+    %8.qsub1:qq = MOVIv2d_ns 0
+    %4:zpr = SUBREG_TO_REG 0, %8.qsub1:qq, %subreg.zsub
+    %5:gpr64sp = COPY $xzr
+
+  bb.1:
+    TBNZW %2:gpr32, 0, %bb.3
+    B %bb.2
+
+  bb.2:
+    %8.dsub:qq = LDRDui %5:gpr64sp, 0, implicit-def %8.qsub0:qq
+    ST2Twov2d %8:qq, %0:gpr64common
+
+  bb.3:
+    STR_ZXI %4:zpr, %0:gpr64common, 1
+    STR_ZXI %4:zpr, %0:gpr64common, 0
+    B %bb.1
+...
+---
+name:            reproducer3
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: reproducer3
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64common = COPY $x1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
+  ; CHECK-NEXT:   undef [[MOVIv2d_ns:%[0-9]+]].qsub1:zpr2 = MOVIv2d_ns 0
+  ; CHECK-NEXT:   [[MOVIv2d_ns:%[0-9]+]].zsub:zpr2 = MOVIv2d_ns 0
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr64sp = COPY $xzr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   TBNZW [[COPY1]], 0, %bb.3
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[MOVIv2d_ns:%[0-9]+]].dsub1:zpr2 = LDRDui [[COPY2]], 0, implicit-def [[MOVIv2d_ns]].qsub1
+  ; CHECK-NEXT:   ST2Twov2d [[MOVIv2d_ns]].zsub_qsub1, [[COPY]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   STR_ZXI [[MOVIv2d_ns]].zsub0, [[COPY]], 1
+  ; CHECK-NEXT:   STR_ZXI [[MOVIv2d_ns]].zsub0, [[COPY]], 0
+  ; CHECK-NEXT:   B %bb.1
+  bb.0:
+    liveins: $w0, $x1
+    %0:gpr64common = COPY $x1
+    %1:gpr32 = COPY $w0
+    %2:gpr32 = COPY %1:gpr32
+    undef %8.qsub1:qq = MOVIv2d_ns 0
+    %8.qsub0:qq = MOVIv2d_ns 0
+    %4:zpr = SUBREG_TO_REG 0, %8.qsub0:qq, %subreg.zsub
+    %5:gpr64sp = COPY $xzr
+
+  bb.1:
+    TBNZW %2:gpr32, 0, %bb.3
+    B %bb.2
+
+  bb.2:
+    %8.dsub1:qq = LDRDui %5:gpr64sp, 0, implicit-def %8.qsub1:qq
+    ST2Twov2d %8:qq, %0:gpr64common
+
+  bb.3:
+    STR_ZXI %4:zpr, %0:gpr64common, 1
+    STR_ZXI %4:zpr, %0:gpr64common, 0
+    B %bb.1
+...
diff --git a/llvm/test/CodeGen/AArch64/pr151888.mir b/llvm/test/CodeGen/AArch64/pr151888.mir
new file mode 100644
index 0000000000000..5b66f136cbc4f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr151888.mir
@@ -0,0 +1,17 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64 -run-pass=register-coalescer -o - %s | FileCheck %s
+---
+name:            reproducer
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: reproducer
+    ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri $xzr, 0, 31
+    ; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %1:gpr32 = COPY killed $wzr
+    %2:gpr64 = SUBREG_TO_REG 0, %1:gpr32, %subreg.sub_32
+    %3:gpr64 = UBFMXri %2:gpr64, 0, 31
+    $x0 = COPY %3:gpr64
+    RET_ReallyLR implicit $x0
+...
diff --git a/llvm/test/CodeGen/AArch64/pr164181-reduced.ll b/llvm/test/CodeGen/AArch64/pr164181-reduced.ll
new file mode 100644
index 0000000000000..192893e6a08cc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr164181-reduced.ll
@@ -0,0 +1,183 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+;
+; This is a reduced version of pr164181.ll, which failed with subreg liveness enabled
+; after adding the implicit-def for a SUBREG_TO_REG to mark the top 32-bits of a register
+; being written by a MOVi32imm instruction. This previously failed the machine verifier
+; because the liverange for the top 32-bits weren't updated when rematerializing the
+; MOVi32imm.
+;
+; RUN: llc -verify-machineinstrs -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -enable-subreg-liveness=false < %s | FileCheck %s
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @f(i1 %var_0, i64 %var_2, i64 %var_11, ptr %arr_3, ptr %arr_4, ptr %arr_7, ptr %arr_13, ptr %invariant.gep875.us, ptr %arrayidx384.us, i16 %0, i1 %tobool435.not.us, ptr %gep876.us, i16 %cond464.in.us, ptr %1, i16 %conv227.us, i1 %cmp378.us) #0 {
+; CHECK-LABEL: f:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-96]! // 8-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    .cfi_offset w23, -40
+; CHECK-NEXT:    .cfi_offset w24, -48
+; CHECK-NEXT:    .cfi_offset w25, -56
+; CHECK-NEXT:    .cfi_offset w26, -64
+; CHECK-NEXT:    .cfi_offset w27, -72
+; CHECK-NEXT:    .cfi_offset w28, -80
+; CHECK-NEXT:    .cfi_offset w30, -96
+; CHECK-NEXT:    ldrb w9, [sp, #152]
+; CHECK-NEXT:    ldrh w10, [sp, #144]
+; CHECK-NEXT:    mov x19, #-18403 // =0xffffffffffffb81d
+; CHECK-NEXT:    ldr x11, [sp, #136]
+; CHECK-NEXT:    ldrh w12, [sp, #128]
+; CHECK-NEXT:    mov x8, xzr
+; CHECK-NEXT:    ldr x13, [sp, #120]
+; CHECK-NEXT:    ldrb w14, [sp, #112]
+; CHECK-NEXT:    mov w15, wzr
+; CHECK-NEXT:    ldrh w16, [sp, #104]
+; CHECK-NEXT:    ldr x17, [sp, #96]
+; CHECK-NEXT:    mov w18, #149 // =0x95
+; CHECK-NEXT:    movk x19, #58909, lsl #16
+; CHECK-NEXT:    mov w20, #-18978 // =0xffffb5de
+; CHECK-NEXT:    mov w21, #1 // =0x1
+; CHECK-NEXT:    mov w22, #-7680 // =0xffffe200
+; CHECK-NEXT:    mov w23, #36006 // =0x8ca6
+; CHECK-NEXT:    mov x25, xzr
+; CHECK-NEXT:    mov x24, xzr
+; CHECK-NEXT:  .LBB0_1: // %for.body99.us
+; CHECK-NEXT:    // =>This Loop Header: Depth=1
+; CHECK-NEXT:    // Child Loop BB0_4 Depth 2
+; CHECK-NEXT:    // Child Loop BB0_10 Depth 2
+; CHECK-NEXT:    mov w27, w15
+; CHECK-NEXT:    mov x26, x25
+; CHECK-NEXT:    mov x28, x24
+; CHECK-NEXT:    b .LBB0_4
+; CHECK-NEXT:  .LBB0_2: // in Loop: Header=BB0_4 Depth=2
+; CHECK-NEXT:    mov w25, #1 // =0x1
+; CHECK-NEXT:  .LBB0_3: // %for.inc371.us
+; CHECK-NEXT:    // in Loop: Header=BB0_4 Depth=2
+; CHECK-NEXT:    mul w27, w15, w20
+; CHECK-NEXT:    mov x28, xzr
+; CHECK-NEXT:    mov x26, x2
+; CHECK-NEXT:    tbz w0, #0, .LBB0_9
+; CHECK-NEXT:  .LBB0_4: // %for.body194.us
+; CHECK-NEXT:    // Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    // => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    orr x15, x28, x19
+; CHECK-NEXT:    mov x24, x28
+; CHECK-NEXT:    strh w10, [x13]
+; CHECK-NEXT:    strb w18, [x5]
+; CHECK-NEXT:    str x15, [x4]
+; CHECK-NEXT:    mov w15, w27
+; CHECK-NEXT:    str x8, [x11]
+; CHECK-NEXT:    str x1, [x3]
+; CHECK-NEXT:    tbz w14, #0, .LBB0_2
+; CHECK-NEXT:  // %bb.5: // %if.then327.us
+; CHECK-NEXT:    // in Loop: Header=BB0_4 Depth=2
+; CHECK-NEXT:    cbnz w21, .LBB0_7
+; CHECK-NEXT:  // %bb.6: // %cond.true331.us
+; CHECK-NEXT:    // in Loop: Header=BB0_4 Depth=2
+; CHECK-NEXT:    ldrsb w27, [x8]
+; CHECK-NEXT:    b .LBB0_8
+; CHECK-NEXT:  .LBB0_7: // in Loop: Header=BB0_4 Depth=2
+; CHECK-NEXT:    mov w27, wzr
+; CHECK-NEXT:  .LBB0_8: // %cond.end345.us
+; CHECK-NEXT:    // in Loop: Header=BB0_4 Depth=2
+; CHECK-NEXT:    mov x25, xzr
+; CHECK-NEXT:    strh w27, [x3]
+; CHECK-NEXT:    str x26, [x6]
+; CHECK-NEXT:    b .LBB0_3
+; CHECK-NEXT:  .LBB0_9: // %for.cond376.preheader.us
+; CHECK-NEXT:    // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    mov x26, xzr
+; CHECK-NEXT:    mov w27, wzr
+; CHECK-NEXT:  .LBB0_10: // %for.body380.us
+; CHECK-NEXT:    // Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    // => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    ands w28, w0, #0x1
+; CHECK-NEXT:    orr x26, x26, #0x1
+; CHECK-NEXT:    strh w16, [x7]
+; CHECK-NEXT:    csel w30, w23, w22, ne
+; CHECK-NEXT:    tst w12, #0xffff
+; CHECK-NEXT:    csel w21, wzr, w27, eq
+; CHECK-NEXT:    cmp w28, #0
+; CHECK-NEXT:    str w30, [x17]
+; CHECK-NEXT:    csel w27, w27, w21, ne
+; CHECK-NEXT:    tbnz w9, #0, .LBB0_10
+; CHECK-NEXT:  // %bb.11: // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    mov w21, #1 // =0x1
+; CHECK-NEXT:    b .LBB0_1
+entry:
+  br label %for.body99.us
+
+for.body99.us:                                    ; preds = %for.inc505.us, %entry
+  %mul287985.us = phi i16 [ 0, %entry ], [ %mul287986.us, %for.inc505.us ]
+  %mul354905.us = phi i64 [ 0, %entry ], [ %mul354907.us, %for.inc505.us ]
+  %sub283896.us = phi i64 [ 0, %entry ], [ %sub283897.us, %for.inc505.us ]
+  %conv96880.us4 = phi i64 [ 0, %entry ], [ 0, %for.inc505.us ]
+  br label %for.body194.us
+
+for.body380.us:                                   ; preds = %for.cond376.preheader.us, %for.inc505.us
+  %indvars.iv10181 = phi i64 [ 0, %for.cond376.preheader.us ], [ %indvars.iv.next1019, %for.inc505.us ]
+  %2 = phi i8 [ 0, %for.cond376.preheader.us ], [ %3, %for.inc505.us ]
+  store i16 %0, ptr %invariant.gep875.us, align 2
+  %arrayidx416.us = getelementptr i16, ptr %arr_13, i64 %indvars.iv10181
+  %conv419.us = select i1 %var_0, i32 36006, i32 -7680
+  store i32 %conv419.us, ptr %arrayidx384.us, align 4
+  br i1 %var_0, label %for.inc505.us, label %if.then436.us
+
+if.then436.us:                                    ; preds = %for.body380.us
+  %cond464.in.us6 = load i16, ptr null, align 2
+  %tobool465.not.us = icmp eq i16 %cond464.in.us, 0
+  %spec.select = select i1 %tobool465.not.us, i8 0, i8 %2
+  br label %for.inc505.us
+
+for.inc505.us:                                    ; preds = %if.then436.us, %for.body380.us
+  %3 = phi i8 [ %2, %for.body380.us ], [ %spec.select, %if.then436.us ]
+  %indvars.iv.next1019 = or i64 %indvars.iv10181, 1
+  br i1 %cmp378.us, label %for.body380.us, label %for.body99.us
+
+for.body194.us:                                   ; preds = %for.inc371.us, %for.body99.us
+  %mul287986.us = phi i16 [ %mul287985.us, %for.body99.us ], [ %mul287.us, %for.inc371.us ]
+  %mul354906.us = phi i64 [ %mul354905.us, %for.body99.us ], [ %var_11, %for.inc371.us ]
+  %sub283897.us = phi i64 [ %sub283896.us, %for.body99.us ], [ 0, %for.inc371.us ]
+  store i16 %conv227.us, ptr %gep876.us, align 2
+  store i8 -107, ptr %arr_7, align 1
+  %sub283.us = or i64 %sub283897.us, -434259939
+  store i64 %sub283.us, ptr %arr_4, align 8
+  %mul287.us = mul i16 %mul287986.us, -18978
+  store i64 0, ptr %1, align 8
+  store i64 %var_2, ptr %arr_3, align 8
+  br i1 %tobool435.not.us, label %if.then327.us, label %for.inc371.us
+
+if.then327.us:                                    ; preds = %for.body194.us
+  %tobool330.not.us = icmp eq i32 0, 0
+  br i1 %tobool330.not.us, label %cond.end345.us, label %cond.true331.us
+
+cond.true331.us:                                  ; preds = %if.then327.us
+  %4 = load i8, ptr null, align 1
+  %5 = sext i8 %4 to i16
+  br label %cond.end345.us
+
+cond.end345.us:                                   ; preds = %cond.true331.us, %if.then327.us
+  %cond346.us = phi i16 [ %5, %cond.true331.us ], [ 0, %if.then327.us ]
+  store i16 %cond346.us, ptr %arr_3, align 2
+  store i64 %mul354906.us, ptr %arr_13, align 8
+  br label %for.inc371.us
+
+for.inc371.us:                                    ; preds = %cond.end345.us, %for.body194.us
+  %mul354907.us = phi i64 [ 1, %for.body194.us ], [ 0, %cond.end345.us ]
+  br i1 %var_0, label %for.body194.us, label %for.cond376.preheader.us
+
+for.cond376.preheader.us:                         ; preds = %for.inc371.us
+  %arrayidx384.us9 = getelementptr i16, ptr null, i64 %conv96880.us4
+  br label %for.body380.us
+}
+
+attributes #0 = { "frame-pointer"="non-leaf" }
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
index 2a77d4dd33fe5..4206c0bc26991 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
@@ -27,11 +27,12 @@ define i32 @caller() nounwind ssp {
 ; CHECK-NEXT:    sub sp, sp, #208
 ; CHECK-NEXT:    mov w8, #10 ; =0xa
 ; CHECK-NEXT:    mov w9, #9 ; =0x9
-; CHECK-NEXT:    mov w10, #8 ; =0x8
+; CHECK-NEXT:    mov w0, #1 ; =0x1
 ; CHECK-NEXT:    stp x9, x8, [sp, #24]
-; CHECK-NEXT:    mov w8, #7 ; =0x7
+; CHECK-NEXT:    mov w8, #8 ; =0x8
 ; CHECK-NEXT:    mov w9, #6 ; =0x6
-; CHECK-NEXT:    mov w0, #1 ; =0x1
+; CHECK-NEXT:    str x8, [sp, #16]
+; CHECK-NEXT:    mov w8, #7 ; =0x7
 ; CHECK-NEXT:    mov w1, #2 ; =0x2
 ; CHECK-NEXT:    mov w2, #3 ; =0x3
 ; CHECK-NEXT:    mov w3, #4 ; =0x4
@@ -46,8 +47,7 @@ define i32 @caller() nounwind ssp {
 ; CHECK-NEXT:    stp x22, x21, [sp, #160] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #176] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x29, x30, [sp, #192] ; 16-byte Folded Spill
-; CHECK-NEXT:    stp x8, x10, [sp, #8]
-; CHECK-NEXT:    str x9, [sp]
+; CHECK-NEXT:    stp x9, x8, [sp]
 ; CHECK-NEXT:    bl _callee
 ; CHECK-NEXT:    ldp x29, x30, [sp, #192] ; 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x20, x19, [sp, #176] ; 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir
new file mode 100644
index 0000000000000..a58a23068896b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir
@@ -0,0 +1,45 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=false -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=true -o - %s | FileCheck %s
+---
+name: test
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x1
+    ; CHECK-LABEL: name: test
+    ; CHECK: liveins: $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $x0 = COPY $x1
+    ; CHECK-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def renamable $x1
+    ; CHECK-NEXT: RET_ReallyLR implicit $x1, implicit $x0
+    %190:gpr64 = COPY killed $x1
+    %191:gpr32 = COPY %190.sub_32:gpr64
+    %192:gpr32 = ORRWrr $wzr, killed %191:gpr32
+    %193:gpr64all = SUBREG_TO_REG 0, killed %192:gpr32, %subreg.sub_32
+    $x0 = COPY killed %190:gpr64
+    $x1 = COPY killed %193:gpr64all
+    RET_ReallyLR implicit $x1, implicit $x0
+...
+
+# In this test, we should avoid adding an implicit-def to ORRXri, because
+# the register class will already be gpr64sp.
+---
+name: test2
+tracksRegLiveness: true
+frameInfo:
+  adjustsStack:    true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test2
+    ; CHECK: renamable $x8 = IMPLICIT_DEF
+    ; CHECK-NEXT: renamable $x9 = ORRXri renamable $x8, 8128
+    ; CHECK-NEXT: $x2 = ORRXri renamable $x8, 8128
+    ; CHECK-NEXT: RET_ReallyLR implicit killed renamable $x8, implicit killed renamable $x9
+    %0:gpr64 = IMPLICIT_DEF
+    %1:gpr64sp = ORRXri %0, 8128
+    %3:gpr64 = SUBREG_TO_REG 0, %1.sub_32, %subreg.sub_32
+    %2:gpr64all = COPY killed %1
+    $x2 = COPY killed %2
+    RET_ReallyLR implicit %0, implicit %3
+...
diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
index 08fc47d9480ce..eb6242ce9940d 100644
--- a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
+++ b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
@@ -7,9 +7,18 @@
 # CHECK-DBG: ********** JOINING INTERVALS ***********
 # CHECK-DBG: ********** INTERVALS **********
 # CHECK-DBG: %0 [16r,32r:0) 0@16r  weight:0.000000e+00
-# CHECK-DBG: %3 [48r,112r:0) 0@48r  L0000000000000040 [48r,112r:0) 0@48r  weight:0.000000e+00
-# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r  L0000000000000080 [112e,112d:0) 0@112e  L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r  weight:0.000000e+00
+# CHECK-DBG: %3 [48r,112r:0) 0@48r  L0000000000000080 [48r,112r:0) 0@48r  L0000000000000040 [48r,112r:0) 0@48r  weight:0.000000e+00
+# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r  L0000000000000080 [80r,112e:1)[112e,112d:0) 0@112e 1@80r  L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r  weight:0.000000e+00
 # CHECK-DBG: %5 [32r,112r:1)[112r,112d:0) 0@112r 1@32r  weight:0.000000e+00
+# CHECK-DBG: ********** MACHINEINSTRS **********
+# CHECK-DBG: 0B      bb.0.entry:
+# CHECK-DBG: 16B       %0:gpr64sp = ADDXri %stack.0, 0, 0
+# CHECK-DBG: 32B       %5:gpr64common = nuw ADDXri %0:gpr64sp, 64, 0
+# CHECK-DBG: 48B       undef %3.sub_32:gpr64 = MOVi32imm 64, implicit-def %3:gpr64
+# CHECK-DBG: 80B       undef %4.sub_32:gpr64 = MOVi32imm 64, implicit-def %4:gpr64
+# CHECK-DBG: 112B      dead %5:gpr64common, dead early-clobber %4:gpr64 = MOPSMemorySetPseudo %5:gpr64common(tied-def 0), %4:gpr64(tied-def 1), %3:gpr64, implicit-def dead $nzcv
+# CHECK-DBG: 128B      RET_ReallyLR
+
 ---
 name:            test
 tracksRegLiveness: true
@@ -43,9 +52,44 @@ body:             |
 # CHECK-DBG: %1 [32r,48B:2)[48B,320r:0)[320r,368B:1) 0@48B-phi 1@320r 2@32r
 # CHECK-DBG-SAME: weight:0.000000e+00
 # CHECK-DBG: %3 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi
-# CHECK-DBG-SAME: L0000000000000080 [288r,304B:0)[304B,320r:3) 0@288r 1@x 2@x 3@304B-phi
+# CHECK-DBG-SAME: L0000000000000080 [240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@x 3@304B-phi
 # CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi
 # CHECK-DBG-SAME: weight:0.000000e+00
+# CHECK-DBG: ********** MACHINEINSTRS **********
+# CHECK-DBG: 0B      bb.0:
+# CHECK-DBG:           successors: %bb.1(0x80000000); %bb.1(100.00%)
+# CHECK-DBG: 32B       %1:gpr64 = IMPLICIT_DEF
+# CHECK-DBG: 48B     bb.1:
+# CHECK-DBG:         ; predecessors: %bb.0, %bb.7
+# CHECK-DBG:           successors: %bb.2(0x80000000); %bb.2(100.00%)
+# CHECK-DBG: 64B     bb.2:
+# CHECK-DBG:         ; predecessors: %bb.1
+# CHECK-DBG:           successors: %bb.3(0x80000000); %bb.3(100.00%)
+# CHECK-DBG: 80B       undef %3.sub_32:gpr64 = MOVi32imm 1
+# CHECK-DBG: 96B     bb.3:
+# CHECK-DBG:         ; predecessors: %bb.2
+# CHECK-DBG:           successors: %bb.7(0x40000000), %bb.4(0x40000000); %bb.7(50.00%), %bb.4(50.00%)
+# CHECK-DBG: 112B      $nzcv = IMPLICIT_DEF
+# CHECK-DBG: 144B      Bcc 1, %bb.7, implicit killed $nzcv
+# CHECK-DBG: 160B    bb.4:
+# CHECK-DBG:         ; predecessors: %bb.3
+# CHECK-DBG:           successors: %bb.6(0x40000000), %bb.5(0x40000000); %bb.6(50.00%), %bb.5(50.00%)
+# CHECK-DBG: 176B      $nzcv = IMPLICIT_DEF
+# CHECK-DBG: 192B      Bcc 1, %bb.6, implicit killed $nzcv
+# CHECK-DBG: 208B    bb.5:
+# CHECK-DBG:         ; predecessors: %bb.4
+# CHECK-DBG:           successors: %bb.7(0x80000000); %bb.7(100.00%)
+# CHECK-DBG: 240B      undef %3.sub_32:gpr64 = MOVi32imm 1, implicit-def %3:gpr64
+# CHECK-DBG: 256B      B %bb.7
+# CHECK-DBG: 272B    bb.6:
+# CHECK-DBG:         ; predecessors: %bb.4
+# CHECK-DBG:           successors: %bb.7(0x80000000); %bb.7(100.00%)
+# CHECK-DBG: 288B      %3:gpr64 = COPY $xzr
+# CHECK-DBG: 304B    bb.7:
+# CHECK-DBG:         ; predecessors: %bb.3, %bb.5, %bb.6
+# CHECK-DBG:           successors: %bb.1(0x80000000); %bb.1(100.00%)
+# CHECK-DBG: 320B      %1:gpr64 = ADDXrs %1:gpr64, %3:gpr64, 1
+# CHECK-DBG: 352B      B %bb.1
 ---
 name:              reproducer
 tracksRegLiveness: true
@@ -92,6 +136,42 @@ body:             |
 # CHECK-DBG-SAME: L0000000000000080 [224r,256B:1)[272r,288B:0)[288B,304r:3) 0@272r 1@224r 2@x 3@288B-phi
 # CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[224r,256B:1)[272r,288B:0)[288B,304r:3) 0@272r 1@224r 2@80r 3@288B-phi
 # CHECK-DBG-SAME: weight:0.000000e+00
+# CHECK-DBG: ********** MACHINEINSTRS **********
+# CHECK-DBG: 0B      bb.0:
+# CHECK-DBG:           successors: %bb.1(0x80000000); %bb.1(100.00%)
+# CHECK-DBG: 32B       %1:gpr64 = IMPLICIT_DEF
+# CHECK-DBG: 48B     bb.1:
+# CHECK-DBG:         ; predecessors: %bb.0, %bb.7
+# CHECK-DBG:           successors: %bb.2(0x80000000); %bb.2(100.00%)
+# CHECK-DBG: 64B     bb.2:
+# CHECK-DBG:         ; predecessors: %bb.1
+# CHECK-DBG:           successors: %bb.3(0x80000000); %bb.3(100.00%)
+# CHECK-DBG: 80B       undef %3.sub_32:gpr64 = MOVi32imm 1
+# CHECK-DBG: 96B     bb.3:
+# CHECK-DBG:         ; predecessors: %bb.2
+# CHECK-DBG:           successors: %bb.7(0x40000000), %bb.4(0x40000000); %bb.7(50.00%), %bb.4(50.00%)
+# CHECK-DBG: 112B      $nzcv = IMPLICIT_DEF
+# CHECK-DBG: 144B      Bcc 1, %bb.7, implicit killed $nzcv
+# CHECK-DBG: 160B    bb.4:
+# CHECK-DBG:         ; predecessors: %bb.3
+# CHECK-DBG:           successors: %bb.6(0x40000000), %bb.5(0x40000000); %bb.6(50.00%), %bb.5(50.00%)
+# CHECK-DBG: 176B      $nzcv = IMPLICIT_DEF
+# CHECK-DBG: 192B      Bcc 1, %bb.6, implicit killed $nzcv
+# CHECK-DBG: 208B    bb.5:
+# CHECK-DBG:         ; predecessors: %bb.4
+# CHECK-DBG:           successors: %bb.7(0x80000000); %bb.7(100.00%)
+# CHECK-DBG: 224B      %3:gpr64 = IMPLICIT_DEF
+# CHECK-DBG: 240B      B %bb.7
+# CHECK-DBG: 256B    bb.6:
+# CHECK-DBG:         ; predecessors: %bb.4
+# CHECK-DBG:           successors: %bb.7(0x80000000); %bb.7(100.00%)
+# CHECK-DBG: 272B      %3:gpr64 = COPY $xzr
+# CHECK-DBG: 288B    bb.7:
+# CHECK-DBG:         ; predecessors: %bb.3, %bb.5, %bb.6
+# CHECK-DBG:           successors: %bb.1(0x80000000); %bb.1(100.00%)
+# CHECK-DBG: 304B      %1:gpr64 = ADDXrs %1:gpr64, %3:gpr64, 1
+# CHECK-DBG: 336B      B %bb.1
+
 ---
 name:              reproducer2
 tracksRegLiveness: true
@@ -127,3 +207,78 @@ body:             |
     B %bb.1
 
 ...
+# CHECK-DBG: ********** REGISTER COALESCER **********
+# CHECK-DBG: ********** Function: reproducer3
+# CHECK-DBG: ********** JOINING INTERVALS ***********
+# CHECK-DBG: ********** INTERVALS **********
+# CHECK-DBG: W0 [0B,32r:0)[320r,336r:1) 0@0B-phi 1@320r
+# CHECK-DBG: W1 [0B,16r:0) 0@0B-phi
+# CHECK-DBG: %0 [16r,64r:0) 0@16r  weight:0.000000e+00
+# CHECK-DBG: %1 [32r,128r:0) 0@32r  weight:0.000000e+00
+# CHECK-DBG: %2 [48r,64r:0) 0@48r  weight:0.000000e+00
+# CHECK-DBG: %3 [64r,80r:0) 0@64r  weight:0.000000e+00
+# CHECK-DBG: %4 [80r,176r:0) 0@80r  weight:0.000000e+00
+# CHECK-DBG: %7 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r
+# CHECK-DBG-SAME: L0000000000000080 [128r,256r:0)[304B,320r:0) 0@128r
+# CHECK-DBG-SAME: L0000000000000040 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r
+# CHECK-DBG-SAME: weight:0.000000e+00
+# CHECK-DBG: %8 [96r,176r:1)[176r,192r:0) 0@176r 1@96r  weight:0.000000e+00
+# CHECK-DBG: %9 [256r,272r:0) 0@256r  weight:0.000000e+00
+# CHECK-DBG: ********** MACHINEINSTRS **********
+# CHECK-DBG: 0B      bb.0:
+# CHECK-DBG:           successors: %bb.2(0x40000000), %bb.1(0x40000000); %bb.2(50.00%), %bb.1(50.00%)
+# CHECK-DBG:           liveins: $w0, $w1
+# CHECK-DBG: 16B       %0:gpr32 = COPY $w1
+# CHECK-DBG: 32B       %1:gpr32 = COPY $w0
+# CHECK-DBG: 48B       %2:gpr32 = UBFMWri %1:gpr32, 31, 30
+# CHECK-DBG: 64B       %3:gpr32 = SUBWrs %2:gpr32, %0:gpr32, 1
+# CHECK-DBG: 80B       %4:gpr32 = UBFMWri %3:gpr32, 1, 31
+# CHECK-DBG: 96B       %8:gpr32common = MOVi32imm 1
+# CHECK-DBG: 112B      undef %7.sub_32:gpr64 = MOVi32imm 1
+# CHECK-DBG: 128B      undef %7.sub_32:gpr64 = BFMWri %7.sub_32:gpr64(tied-def 0), %1:gpr32, 31, 30, implicit-def %7:gpr64
+# CHECK-DBG: 176B      %8:gpr32common = BFMWri %8:gpr32common(tied-def 0), %4:gpr32, 30, 29
+# CHECK-DBG: 192B      dead $wzr = SUBSWri %8:gpr32common, 0, 0, implicit-def $nzcv
+# CHECK-DBG: 208B      Bcc 2, %bb.2, implicit killed $nzcv
+# CHECK-DBG: 224B      B %bb.1
+# CHECK-DBG: 240B    bb.1:
+# CHECK-DBG:         ; predecessors: %bb.0
+# CHECK-DBG: 256B      %9:gpr64common = UBFMXri %7:gpr64, 62, 61
+# CHECK-DBG: 272B      dead $xzr = LDRXui %9:gpr64common, 0
+# CHECK-DBG: 288B      RET_ReallyLR
+# CHECK-DBG: 304B    bb.2:
+# CHECK-DBG:         ; predecessors: %bb.0
+# CHECK-DBG: 320B      $x0 = COPY %7:gpr64
+# CHECK-DBG: 336B      RET_ReallyLR implicit $x0
+
+---
+name:            reproducer3
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+
+    %0:gpr32 = COPY killed $w1
+    %1:gpr32 = COPY killed $w0
+    %3:gpr32 = UBFMWri %1, 31, 30
+    %4:gpr32 = SUBWrs killed %3, killed %0, 1
+    %5:gpr32 = UBFMWri killed %4, 1, 31
+    %6:gpr32 = MOVi32imm 1
+    %7:gpr32 = COPY %6
+    %7:gpr32 = BFMWri %7, killed %1, 31, 30
+    %8:gpr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32
+    %9:gpr32common = COPY killed %6
+    %9:gpr32common = BFMWri %9, killed %5, 30, 29
+    dead $wzr = SUBSWri killed %9, 0, 0, implicit-def $nzcv
+    Bcc 2, %bb.2, implicit killed $nzcv
+    B %bb.1
+
+  bb.1:
+    %10:gpr64common = UBFMXri killed %8, 62, 61
+    dead $xzr = LDRXui killed %10, 0
+    RET_ReallyLR
+
+  bb.2:
+    $x0 = COPY killed %8
+    RET_ReallyLR implicit killed $x0
+
+...
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index d09ef0e2c6ac0..c12dbe488263f 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -1532,11 +1532,11 @@ define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float
 ; CHECK-NEXT:    # kill: def $f7 killed $f7 def $vr7
 ; CHECK-NEXT:    # kill: def $f6 killed $f6 def $vr6
 ; CHECK-NEXT:    # kill: def $f5 killed $f5 def $vr5
-; CHECK-NEXT:    # kill: def $f4 killed $f4 def $xr4
+; CHECK-NEXT:    # kill: def $f4 killed $f4 def $vr4 def $xr4
 ; CHECK-NEXT:    # kill: def $f3 killed $f3 def $vr3
 ; CHECK-NEXT:    # kill: def $f2 killed $f2 def $vr2
 ; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vr1
-; CHECK-NEXT:    # kill: def $f0 killed $f0 def $xr0
+; CHECK-NEXT:    # kill: def $f0 killed $f0 def $vr0 def $xr0
 ; CHECK-NEXT:    vextrins.w $vr4, $vr5, 16
 ; CHECK-NEXT:    vextrins.w $vr4, $vr6, 32
 ; CHECK-NEXT:    vextrins.w $vr4, $vr7, 48
@@ -1619,7 +1619,7 @@ define void @buildvector_v8f32_subseq_2(ptr %dst, float %a0, float %a1, float %a
 ; CHECK-NEXT:    # kill: def $f3 killed $f3 def $vr3
 ; CHECK-NEXT:    # kill: def $f2 killed $f2 def $vr2
 ; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vr1
-; CHECK-NEXT:    # kill: def $f0 killed $f0 def $xr0
+; CHECK-NEXT:    # kill: def $f0 killed $f0 def $vr0 def $xr0
 ; CHECK-NEXT:    vextrins.w $vr0, $vr1, 16
 ; CHECK-NEXT:    vextrins.w $vr0, $vr2, 32
 ; CHECK-NEXT:    vextrins.w $vr0, $vr3, 48
@@ -1643,7 +1643,7 @@ define void @buildvector_v8f32_subseq_4(ptr %dst, float %a0, float %a1) nounwind
 ; CHECK-LABEL: buildvector_v8f32_subseq_4:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vr1
-; CHECK-NEXT:    # kill: def $f0 killed $f0 def $xr0
+; CHECK-NEXT:    # kill: def $f0 killed $f0 def $vr0 def $xr0
 ; CHECK-NEXT:    vextrins.w $vr0, $vr1, 16
 ; CHECK-NEXT:    xvreplve0.d $xr0, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
@@ -1665,9 +1665,9 @@ define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, dou
 ; CHECK-LABEL: buildvector_v4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    # kill: def $f3_64 killed $f3_64 def $vr3
-; CHECK-NEXT:    # kill: def $f2_64 killed $f2_64 def $xr2
+; CHECK-NEXT:    # kill: def $f2_64 killed $f2_64 def $vr2 def $xr2
 ; CHECK-NEXT:    # kill: def $f1_64 killed $f1_64 def $vr1
-; CHECK-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
+; CHECK-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
 ; CHECK-NEXT:    vextrins.d $vr2, $vr3, 16
 ; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr2, 2
@@ -1722,7 +1722,7 @@ define void @buildvector_v4f64_subseq_2(ptr %dst, double %a0, double %a1) nounwi
 ; CHECK-LABEL: buildvector_v4f64_subseq_2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    # kill: def $f1_64 killed $f1_64 def $vr1
-; CHECK-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
+; CHECK-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
 ; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16
 ; CHECK-NEXT:    xvreplve0.q $xr0, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
index 45b25013c9173..d01848e8547b6 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
@@ -22,7 +22,7 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
 ; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0
 ; LA32-NEXT:    move $a0, $fp
 ; LA32-NEXT:    bl __powisf2
-; LA32-NEXT:    # kill: def $f0 killed $f0 def $xr0
+; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 def $xr0
 ; LA32-NEXT:    vld $vr1, $sp, 48 # 16-byte Folded Reload
 ; LA32-NEXT:    vextrins.w $vr0, $vr1, 16
 ; LA32-NEXT:    xvst $xr0, $sp, 48 # 32-byte Folded Spill
@@ -56,7 +56,7 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
 ; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0
 ; LA32-NEXT:    move $a0, $fp
 ; LA32-NEXT:    bl __powisf2
-; LA32-NEXT:    # kill: def $f0 killed $f0 def $xr0
+; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 def $xr0
 ; LA32-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload
 ; LA32-NEXT:    vextrins.w $vr0, $vr1, 16
 ; LA32-NEXT:    xvst $xr0, $sp, 16 # 32-byte Folded Spill
@@ -105,7 +105,7 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
 ; LA64-NEXT:    move $a0, $fp
 ; LA64-NEXT:    pcaddu18i $ra, %call36(__powisf2)
 ; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    # kill: def $f0 killed $f0 def $xr0
+; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 def $xr0
 ; LA64-NEXT:    vld $vr1, $sp, 48 # 16-byte Folded Reload
 ; LA64-NEXT:    vextrins.w $vr0, $vr1, 16
 ; LA64-NEXT:    xvst $xr0, $sp, 48 # 32-byte Folded Spill
@@ -143,7 +143,7 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
 ; LA64-NEXT:    move $a0, $fp
 ; LA64-NEXT:    pcaddu18i $ra, %call36(__powisf2)
 ; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    # kill: def $f0 killed $f0 def $xr0
+; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 def $xr0
 ; LA64-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload
 ; LA64-NEXT:    vextrins.w $vr0, $vr1, 16
 ; LA64-NEXT:    xvst $xr0, $sp, 16 # 32-byte Folded Spill
@@ -198,7 +198,7 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
 ; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0
 ; LA32-NEXT:    move $a0, $fp
 ; LA32-NEXT:    bl __powidf2
-; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
+; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
 ; LA32-NEXT:    vld $vr1, $sp, 32 # 16-byte Folded Reload
 ; LA32-NEXT:    vextrins.d $vr0, $vr1, 16
 ; LA32-NEXT:    xvst $xr0, $sp, 32 # 32-byte Folded Spill
@@ -214,7 +214,7 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
 ; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0
 ; LA32-NEXT:    move $a0, $fp
 ; LA32-NEXT:    bl __powidf2
-; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
+; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
 ; LA32-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload
 ; LA32-NEXT:    vextrins.d $vr0, $vr1, 16
 ; LA32-NEXT:    xvld $xr1, $sp, 32 # 32-byte Folded Reload
@@ -244,7 +244,7 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
 ; LA64-NEXT:    move $a0, $fp
 ; LA64-NEXT:    pcaddu18i $ra, %call36(__powidf2)
 ; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
+; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
 ; LA64-NEXT:    vld $vr1, $sp, 32 # 16-byte Folded Reload
 ; LA64-NEXT:    vextrins.d $vr0, $vr1, 16
 ; LA64-NEXT:    xvst $xr0, $sp, 32 # 32-byte Folded Spill
@@ -262,7 +262,7 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
 ; LA64-NEXT:    move $a0, $fp
 ; LA64-NEXT:    pcaddu18i $ra, %call36(__powidf2)
 ; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
+; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
 ; LA64-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload
 ; LA64-NEXT:    vextrins.d $vr0, $vr1, 16
 ; LA64-NEXT:    xvld $xr1, $sp, 32 # 32-byte Folded Reload
diff --git a/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll
index bba269279937a..be5d42bdfb975 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll
@@ -49,7 +49,7 @@ define <4 x i64> @scalar_to_4xi64(i64 %val) {
 define <8 x float> @scalar_to_8xf32(float %val) {
 ; CHECK-LABEL: scalar_to_8xf32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $f0 killed $f0 def $xr0
+; CHECK-NEXT:    # kill: def $f0 killed $f0 def $vr0 def $xr0
 ; CHECK-NEXT:    ret
   %ret = insertelement <8 x float> poison, float %val, i32 0
   ret <8 x float> %ret
@@ -58,7 +58,7 @@ define <8 x float> @scalar_to_8xf32(float %val) {
 define <4 x double> @scalar_to_4xf64(double %val) {
 ; CHECK-LABEL: scalar_to_4xf64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
+; CHECK-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
 ; CHECK-NEXT:    ret
   %ret = insertelement <4 x double> poison, double %val, i32 0
   ret <4 x double> %ret
diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
index afc7a39e18dc8..aae23265710ce 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
@@ -750,21 +750,25 @@ entry:
 define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
 ; CHECK-64-LABEL: testDoubleImm1:
 ; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-64-NEXT:    xxpermdi 34, 1, 34, 1
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: testDoubleImm1:
 ; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-32-NEXT:    xxpermdi 34, 1, 34, 1
 ; CHECK-32-NEXT:    blr
 ;
 ; CHECK-64-P10-LABEL: testDoubleImm1:
 ; CHECK-64-P10:       # %bb.0: # %entry
+; CHECK-64-P10-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-64-P10-NEXT:    xxpermdi 34, 1, 34, 1
 ; CHECK-64-P10-NEXT:    blr
 ;
 ; CHECK-32-P10-LABEL: testDoubleImm1:
 ; CHECK-32-P10:       # %bb.0: # %entry
+; CHECK-32-P10-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-32-P10-NEXT:    xxpermdi 34, 1, 34, 1
 ; CHECK-32-P10-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index fb55511162a7e..4f965780ab95e 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -1754,7 +1754,11 @@ entry:
 define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
 ; P9BE-LABEL: fromRegsConvdtoi:
 ; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; P9BE-NEXT:    xxmrghd vs0, vs2, vs4
+; P9BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9BE-NEXT:    xvcvdpsxws v2, vs0
 ; P9BE-NEXT:    xxmrghd vs0, vs1, vs3
 ; P9BE-NEXT:    xvcvdpsxws v3, vs0
@@ -1763,7 +1767,11 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
 ;
 ; P9LE-LABEL: fromRegsConvdtoi:
 ; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P9LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; P9LE-NEXT:    xvcvdpsxws v2, vs0
 ; P9LE-NEXT:    xxmrghd vs0, vs4, vs2
 ; P9LE-NEXT:    xvcvdpsxws v3, vs0
@@ -1772,6 +1780,10 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
 ;
 ; P8BE-LABEL: fromRegsConvdtoi:
 ; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
 ; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
 ; P8BE-NEXT:    xvcvdpsxws v2, vs0
@@ -1781,6 +1793,10 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
 ;
 ; P8LE-LABEL: fromRegsConvdtoi:
 ; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
 ; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
 ; P8LE-NEXT:    xvcvdpsxws v2, vs0
@@ -3240,7 +3256,11 @@ entry:
 define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) {
 ; P9BE-LABEL: fromRegsConvdtoui:
 ; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; P9BE-NEXT:    xxmrghd vs0, vs2, vs4
+; P9BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9BE-NEXT:    xvcvdpuxws v2, vs0
 ; P9BE-NEXT:    xxmrghd vs0, vs1, vs3
 ; P9BE-NEXT:    xvcvdpuxws v3, vs0
@@ -3249,7 +3269,11 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
 ;
 ; P9LE-LABEL: fromRegsConvdtoui:
 ; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P9LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; P9LE-NEXT:    xvcvdpuxws v2, vs0
 ; P9LE-NEXT:    xxmrghd vs0, vs4, vs2
 ; P9LE-NEXT:    xvcvdpuxws v3, vs0
@@ -3258,6 +3282,10 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
 ;
 ; P8BE-LABEL: fromRegsConvdtoui:
 ; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
 ; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
 ; P8BE-NEXT:    xvcvdpuxws v2, vs0
@@ -3267,6 +3295,10 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
 ;
 ; P8LE-LABEL: fromRegsConvdtoui:
 ; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
 ; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
 ; P8LE-NEXT:    xvcvdpuxws v2, vs0
@@ -4540,24 +4572,32 @@ entry:
 define <2 x i64> @fromRegsConvdtoll(double %a, double %b) {
 ; P9BE-LABEL: fromRegsConvdtoll:
 ; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9BE-NEXT:    xxmrghd vs0, vs1, vs2
 ; P9BE-NEXT:    xvcvdpsxds v2, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: fromRegsConvdtoll:
 ; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9LE-NEXT:    xxmrghd vs0, vs2, vs1
 ; P9LE-NEXT:    xvcvdpsxds v2, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fromRegsConvdtoll:
 ; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs1, vs2
 ; P8BE-NEXT:    xvcvdpsxds v2, vs0
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromRegsConvdtoll:
 ; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs2, vs1
 ; P8LE-NEXT:    xvcvdpsxds v2, vs0
 ; P8LE-NEXT:    blr
@@ -5694,24 +5734,32 @@ entry:
 define <2 x i64> @fromRegsConvdtoull(double %a, double %b) {
 ; P9BE-LABEL: fromRegsConvdtoull:
 ; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9BE-NEXT:    xxmrghd vs0, vs1, vs2
 ; P9BE-NEXT:    xvcvdpuxds v2, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: fromRegsConvdtoull:
 ; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9LE-NEXT:    xxmrghd vs0, vs2, vs1
 ; P9LE-NEXT:    xvcvdpuxds v2, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fromRegsConvdtoull:
 ; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs1, vs2
 ; P8BE-NEXT:    xvcvdpuxds v2, vs0
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromRegsConvdtoull:
 ; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs2, vs1
 ; P8LE-NEXT:    xvcvdpuxds v2, vs0
 ; P8LE-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 7f6fdc7f88cd1..b40fbc3e16873 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -562,6 +562,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; CHECK-P8-NEXT:    bl dummy
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxlxor f0, f0, f0
+; CHECK-P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
@@ -576,6 +577,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; CHECK-P9-NEXT:    bl dummy
 ; CHECK-P9-NEXT:    nop
 ; CHECK-P9-NEXT:    xxlxor f0, f0, f0
+; CHECK-P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
 ; CHECK-P9-NEXT:    stxv vs0, 0(r30)
 ;
@@ -589,6 +591,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; CHECK-P9-BE-NEXT:    bl dummy
 ; CHECK-P9-BE-NEXT:    nop
 ; CHECK-P9-BE-NEXT:    xxlxor f0, f0, f0
+; CHECK-P9-BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-P9-BE-NEXT:    xxmrghd vs0, vs0, vs1
 ; CHECK-P9-BE-NEXT:    stxv vs0, 0(r30)
 ;
@@ -615,6 +618,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; CHECK-P7-NEXT:    bl dummy
 ; CHECK-P7-NEXT:    nop
 ; CHECK-P7-NEXT:    xxlxor f0, f0, f0
+; CHECK-P7-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-P7-NEXT:    xxmrghd vs0, vs1, vs0
 ; CHECK-P7-NEXT:    xxswapd vs0, vs0
 ; CHECK-P7-NEXT:    stxvd2x vs0, 0, r30
@@ -629,6 +633,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; P8-AIX-64-NEXT:    bl .dummy[PR]
 ; P8-AIX-64-NEXT:    nop
 ; P8-AIX-64-NEXT:    xxlxor f0, f0, f0
+; P8-AIX-64-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-AIX-64-NEXT:    xxmrghd vs0, vs0, vs1
 ; P8-AIX-64-NEXT:    stxvd2x vs0, 0, r31
 ;
@@ -642,6 +647,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; P8-AIX-32-NEXT:    bl .dummy[PR]
 ; P8-AIX-32-NEXT:    nop
 ; P8-AIX-32-NEXT:    xxlxor f0, f0, f0
+; P8-AIX-32-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-AIX-32-NEXT:    xxmrghd vs0, vs0, vs1
 ; P8-AIX-32-NEXT:    stxvd2x vs0, 0, r31
 test_entry:
diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
index 04af0947c7a33..a72abf7007e8d 100644
--- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
@@ -6,6 +6,7 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
 ; CHECK-LABEL: fneg_fdiv_splat:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxspltd 0, 1, 0
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_0@toc@l
 ; CHECK-NEXT:    xvredp 1, 0
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
index eac4fb6f98bf7..4519cf4101f42 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
@@ -229,6 +229,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
 ; P8-NEXT:    xscvspdpn f1, vs0
 ; P8-NEXT:    bl nearbyintf
 ; P8-NEXT:    nop
+; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-NEXT:    xxmrghd vs0, vs1, v30
 ; P8-NEXT:    xscvspdpn f1, v31
 ; P8-NEXT:    xvcvdpsp v29, vs0
@@ -239,6 +240,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
 ; P8-NEXT:    xscvspdpn f1, vs0
 ; P8-NEXT:    bl nearbyintf
 ; P8-NEXT:    nop
+; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-NEXT:    xxmrghd vs0, v30, vs1
 ; P8-NEXT:    li r3, 160
 ; P8-NEXT:    xvcvdpsp v2, vs0
@@ -276,6 +278,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
 ; P9-NEXT:    xscvspdpn f1, vs0
 ; P9-NEXT:    bl nearbyintf
 ; P9-NEXT:    nop
+; P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9-NEXT:    xxmrghd vs0, vs1, v30
 ; P9-NEXT:    xscvspdpn f1, v31
 ; P9-NEXT:    xvcvdpsp v29, vs0
@@ -286,6 +289,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
 ; P9-NEXT:    xscvspdpn f1, vs0
 ; P9-NEXT:    bl nearbyintf
 ; P9-NEXT:    nop
+; P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9-NEXT:    xxmrghd vs0, v30, vs1
 ; P9-NEXT:    lxv v31, 64(r1) # 16-byte Folded Reload
 ; P9-NEXT:    lxv v30, 48(r1) # 16-byte Folded Reload
@@ -326,6 +330,7 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) stric
 ; P8-NEXT:    bl nearbyint
 ; P8-NEXT:    nop
 ; P8-NEXT:    li r3, 144
+; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-NEXT:    xxmrghd v2, v30, vs1
 ; P8-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    li r3, 128
@@ -354,6 +359,7 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) stric
 ; P9-NEXT:    xxswapd vs1, v31
 ; P9-NEXT:    bl nearbyint
 ; P9-NEXT:    nop
+; P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9-NEXT:    xxmrghd v2, v30, vs1
 ; P9-NEXT:    lxv v31, 48(r1) # 16-byte Folded Reload
 ; P9-NEXT:    lxv v30, 32(r1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/frem.ll b/llvm/test/CodeGen/PowerPC/frem.ll
index 19b4b1c9cdf95..21cb206ac43bb 100644
--- a/llvm/test/CodeGen/PowerPC/frem.ll
+++ b/llvm/test/CodeGen/PowerPC/frem.ll
@@ -70,6 +70,7 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    xscvspdpn 2, 0
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxmrghd 0, 1, 61
 ; CHECK-NEXT:    xscvspdpn 1, 62
 ; CHECK-NEXT:    xscvspdpn 2, 63
@@ -83,6 +84,7 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    xscvspdpn 2, 0
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxmrghd 0, 61, 1
 ; CHECK-NEXT:    lxv 63, 80(1) # 16-byte Folded Reload
 ; CHECK-NEXT:    lxv 62, 64(1) # 16-byte Folded Reload
@@ -124,6 +126,7 @@ define <2 x double> @frem2x64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-NEXT:    xxswapd 2, 63
 ; CHECK-NEXT:    bl fmod
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxmrghd 34, 61, 1
 ; CHECK-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
 ; CHECK-NEXT:    lxv 62, 48(1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
index 238e200bfc782..3ae0b02f79e27 100644
--- a/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
@@ -41,39 +41,47 @@ define void @test(ptr %p1, ptr %p2) nounwind {
 ; CHECK-NEXT:    xxswapd 61, 63
 ; CHECK-NEXT:    bl roundeven
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxswapd 56, 1
 ; CHECK-NEXT:    xxlor 1, 59, 59
 ; CHECK-NEXT:    bl roundeven
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxswapd 0, 1
 ; CHECK-NEXT:    xxlor 1, 60, 60
 ; CHECK-NEXT:    xxmrgld 59, 0, 56
 ; CHECK-NEXT:    bl roundeven
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxswapd 60, 1
 ; CHECK-NEXT:    xxlor 1, 62, 62
 ; CHECK-NEXT:    bl roundeven
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxswapd 0, 1
 ; CHECK-NEXT:    xxlor 1, 61, 61
 ; CHECK-NEXT:    xxmrgld 62, 0, 60
 ; CHECK-NEXT:    bl roundeven
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxswapd 61, 1
 ; CHECK-NEXT:    xxlor 1, 63, 63
 ; CHECK-NEXT:    bl roundeven
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxswapd 0, 1
 ; CHECK-NEXT:    xxlor 1, 57, 57
 ; CHECK-NEXT:    xxmrgld 63, 0, 61
 ; CHECK-NEXT:    bl roundeven
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxswapd 61, 1
 ; CHECK-NEXT:    xxlor 1, 58, 58
 ; CHECK-NEXT:    bl roundeven
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    li 3, 160
 ; CHECK-NEXT:    stxvd2x 63, 30, 29
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxswapd 0, 1
 ; CHECK-NEXT:    stxvd2x 62, 30, 28
 ; CHECK-NEXT:    stxvd2x 59, 30, 27
diff --git a/llvm/test/CodeGen/PowerPC/half.ll b/llvm/test/CodeGen/PowerPC/half.ll
index 903ea691ae6ba..8b5b7962da33f 100644
--- a/llvm/test/CodeGen/PowerPC/half.ll
+++ b/llvm/test/CodeGen/PowerPC/half.ll
@@ -1365,6 +1365,7 @@ define <4 x float> @test_extend32_vec4(ptr %p) nounwind {
 ; P8-NEXT:    bl __extendhfsf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    li r3, 80
+; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-NEXT:    xxmrghd vs0, vs61, vs1
 ; P8-NEXT:    xxmrghd vs1, vs63, vs62
 ; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll
index 8d7253b5ce8e3..23748bca0b7b2 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp.ll
@@ -107,6 +107,7 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) nounwind {
 ; CHECK-NEXT:    extsw r4, r3
 ; CHECK-NEXT:    bl ldexpf
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxmrghd vs0, vs1, v29
 ; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    vextuwrx r3, r3, v31
@@ -123,6 +124,7 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) nounwind {
 ; CHECK-NEXT:    xscvspdpn f1, vs0
 ; CHECK-NEXT:    bl ldexpf
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxmrghd vs0, vs1, v29
 ; CHECK-NEXT:    lxv v31, 80(r1) # 16-byte Folded Reload
 ; CHECK-NEXT:    lxv v30, 64(r1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
index 1b137c786cc91..203b3bd15490a 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.modf.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
@@ -294,6 +294,7 @@ define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) {
 ; CHECK-NEXT:    addi r4, r1, 40
 ; CHECK-NEXT:    bl modf
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxmrghd v2, v30, vs1
 ; CHECK-NEXT:    lfd f0, 32(r1)
 ; CHECK-NEXT:    lfd f1, 40(r1)
diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
index b006c78604648..0364166a1b29e 100644
--- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
@@ -928,21 +928,25 @@ entry:
 define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
 ; CHECK-LABEL: testDoubleImm1:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxmrghd v2, v2, vs1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testDoubleImm1:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-BE-NEXT:    xxpermdi v2, vs1, v2, 1
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testDoubleImm1:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-P9-NEXT:    xxpermdi v2, vs1, v2, 1
 ; CHECK-P9-NEXT:    blr
 ;
 ; AIX-P8-LABEL: testDoubleImm1:
 ; AIX-P8:       # %bb.0: # %entry
+; AIX-P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; AIX-P8-NEXT:    xxpermdi v2, vs1, v2, 1
 ; AIX-P8-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 08ca1d153248e..e6307aa2906da 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -107,6 +107,10 @@ entry:
 define <3 x double> @constrained_vector_fdiv_v3f64(<3 x double> %x, <3 x double> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fdiv_v3f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
+; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    xsdivdp 3, 3, 6
@@ -116,6 +120,10 @@ define <3 x double> @constrained_vector_fdiv_v3f64(<3 x double> %x, <3 x double>
 ;
 ; PC64LE9-LABEL: constrained_vector_fdiv_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
+; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE9-NEXT:    xsdivdp 3, 3, 6
@@ -209,6 +217,7 @@ define <2 x double> @constrained_vector_frem_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -239,6 +248,7 @@ define <2 x double> @constrained_vector_frem_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE9-NEXT:    xxswapd 2, 63
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 61, 1
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 48(1) # 16-byte Folded Reload
@@ -390,6 +400,7 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    fmr 2, 30
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 29
 ; PC64LE-NEXT:    fmr 2, 31
@@ -431,6 +442,7 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-NEXT:    fmr 2, 30
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 29
 ; PC64LE9-NEXT:    fmr 2, 31
@@ -486,6 +498,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE-NEXT:    xxswapd 2, 62
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 59, 1
 ; PC64LE-NEXT:    xxlor 1, 61, 61
 ; PC64LE-NEXT:    xxlor 2, 63, 63
@@ -498,6 +511,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 112
 ; PC64LE-NEXT:    vmr 2, 30
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 60, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 96
@@ -536,6 +550,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE9-NEXT:    xxswapd 2, 62
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 59, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 61, 61
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
@@ -546,6 +561,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE9-NEXT:    xxswapd 2, 63
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 60, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 96(1) # 16-byte Folded Reload
@@ -670,6 +686,10 @@ entry:
 define <3 x double> @constrained_vector_fmul_v3f64(<3 x double> %x, <3 x double> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fmul_v3f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
+; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    xsmuldp 3, 3, 6
@@ -679,6 +699,10 @@ define <3 x double> @constrained_vector_fmul_v3f64(<3 x double> %x, <3 x double>
 ;
 ; PC64LE9-LABEL: constrained_vector_fmul_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
+; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE9-NEXT:    xsmuldp 3, 3, 6
@@ -820,6 +844,10 @@ entry:
 define <3 x double> @constrained_vector_fadd_v3f64(<3 x double> %x, <3 x double> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fadd_v3f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
+; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    xsadddp 3, 3, 6
@@ -829,6 +857,10 @@ define <3 x double> @constrained_vector_fadd_v3f64(<3 x double> %x, <3 x double>
 ;
 ; PC64LE9-LABEL: constrained_vector_fadd_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
+; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE9-NEXT:    xsadddp 3, 3, 6
@@ -970,6 +1002,10 @@ entry:
 define <3 x double> @constrained_vector_fsub_v3f64(<3 x double> %x, <3 x double> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fsub_v3f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
+; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    xssubdp 3, 3, 6
@@ -979,6 +1015,10 @@ define <3 x double> @constrained_vector_fsub_v3f64(<3 x double> %x, <3 x double>
 ;
 ; PC64LE9-LABEL: constrained_vector_fsub_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
+; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE9-NEXT:    xssubdp 3, 3, 6
@@ -1105,6 +1145,8 @@ entry:
 define <3 x double> @constrained_vector_sqrt_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sqrt_v3f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE-NEXT:    xssqrtdp 3, 3
 ; PC64LE-NEXT:    xvsqrtdp 2, 0
@@ -1113,6 +1155,8 @@ define <3 x double> @constrained_vector_sqrt_v3f64(<3 x double> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_sqrt_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE9-NEXT:    xssqrtdp 3, 3
 ; PC64LE9-NEXT:    xvsqrtdp 2, 0
@@ -1203,6 +1247,7 @@ define <2 x double> @constrained_vector_pow_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -1233,6 +1278,7 @@ define <2 x double> @constrained_vector_pow_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE9-NEXT:    xxswapd 2, 63
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 61, 1
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 48(1) # 16-byte Folded Reload
@@ -1384,6 +1430,7 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    fmr 2, 30
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 29
 ; PC64LE-NEXT:    fmr 2, 31
@@ -1425,6 +1472,7 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-NEXT:    fmr 2, 30
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 29
 ; PC64LE9-NEXT:    fmr 2, 31
@@ -1480,6 +1528,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE-NEXT:    xxswapd 2, 62
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 59, 1
 ; PC64LE-NEXT:    xxlor 1, 61, 61
 ; PC64LE-NEXT:    xxlor 2, 63, 63
@@ -1492,6 +1541,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 112
 ; PC64LE-NEXT:    vmr 2, 30
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 60, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 96
@@ -1530,6 +1580,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE9-NEXT:    xxswapd 2, 62
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 59, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 61, 61
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
@@ -1540,6 +1591,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE9-NEXT:    xxswapd 2, 63
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 60, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 96(1) # 16-byte Folded Reload
@@ -1618,6 +1670,7 @@ define <2 x double> @constrained_vector_powi_v2f64(<2 x double> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    ld 30, 80(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
@@ -1647,6 +1700,7 @@ define <2 x double> @constrained_vector_powi_v2f64(<2 x double> %x, i32 %y) #0 {
 ; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -1790,6 +1844,7 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    mr 4, 30
@@ -1828,6 +1883,7 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
 ; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    mr 4, 30
@@ -1878,6 +1934,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    mr 4, 30
@@ -1890,6 +1947,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    ld 30, 96(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
@@ -1923,6 +1981,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
 ; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    mr 4, 30
@@ -1933,6 +1992,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
 ; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -2003,6 +2063,7 @@ define <2 x double> @constrained_vector_sin_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -2027,6 +2088,7 @@ define <2 x double> @constrained_vector_sin_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -2149,6 +2211,7 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl sin
@@ -2181,6 +2244,7 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl sin
@@ -2224,6 +2288,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 1, 62
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl sin
@@ -2234,6 +2299,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -2262,6 +2328,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 62
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl sin
@@ -2270,6 +2337,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -2338,6 +2406,7 @@ define <2 x double> @constrained_vector_cos_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -2362,6 +2431,7 @@ define <2 x double> @constrained_vector_cos_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -2484,6 +2554,7 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl cos
@@ -2516,6 +2587,7 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl cos
@@ -2559,6 +2631,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 1, 62
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl cos
@@ -2569,6 +2642,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -2597,6 +2671,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 62
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl cos
@@ -2605,6 +2680,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -2673,6 +2749,7 @@ define <2 x double> @constrained_vector_exp_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -2697,6 +2774,7 @@ define <2 x double> @constrained_vector_exp_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -2819,6 +2897,7 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl exp
@@ -2851,6 +2930,7 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl exp
@@ -2894,6 +2974,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 1, 62
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl exp
@@ -2904,6 +2985,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -2932,6 +3014,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 62
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl exp
@@ -2940,6 +3023,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -3008,6 +3092,7 @@ define <2 x double> @constrained_vector_exp2_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -3032,6 +3117,7 @@ define <2 x double> @constrained_vector_exp2_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -3154,6 +3240,7 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl exp2
@@ -3186,6 +3273,7 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl exp2
@@ -3229,6 +3317,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 1, 62
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl exp2
@@ -3239,6 +3328,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -3267,6 +3357,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 62
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl exp2
@@ -3275,6 +3366,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -3343,6 +3435,7 @@ define <2 x double> @constrained_vector_log_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -3367,6 +3460,7 @@ define <2 x double> @constrained_vector_log_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -3489,6 +3583,7 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl log
@@ -3521,6 +3616,7 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl log
@@ -3564,6 +3660,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 1, 62
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl log
@@ -3574,6 +3671,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -3602,6 +3700,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 62
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl log
@@ -3610,6 +3709,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -3678,6 +3778,7 @@ define <2 x double> @constrained_vector_log10_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -3702,6 +3803,7 @@ define <2 x double> @constrained_vector_log10_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -3824,6 +3926,7 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl log10
@@ -3856,6 +3959,7 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl log10
@@ -3899,6 +4003,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 1, 62
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl log10
@@ -3909,6 +4014,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -3937,6 +4043,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 62
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl log10
@@ -3945,6 +4052,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -4013,6 +4121,7 @@ define <2 x double> @constrained_vector_log2_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -4037,6 +4146,7 @@ define <2 x double> @constrained_vector_log2_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -4159,6 +4269,7 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl log2
@@ -4191,6 +4302,7 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl log2
@@ -4234,6 +4346,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 1, 62
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl log2
@@ -4244,6 +4357,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -4272,6 +4386,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 62
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl log2
@@ -4280,6 +4395,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -4387,6 +4503,8 @@ define <3 x float> @constrained_vector_rint_v3f32(<3 x float> %x) #0 {
 define <3 x double> @constrained_vector_rint_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_rint_v3f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE-NEXT:    xsrdpic 3, 3
 ; PC64LE-NEXT:    xvrdpic 2, 0
@@ -4395,6 +4513,8 @@ define <3 x double> @constrained_vector_rint_v3f64(<3 x double> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_rint_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE9-NEXT:    xsrdpic 3, 3
 ; PC64LE9-NEXT:    xvrdpic 2, 0
@@ -4479,6 +4599,7 @@ define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -4503,6 +4624,7 @@ define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -4625,6 +4747,7 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl nearbyint
@@ -4657,6 +4780,7 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl nearbyint
@@ -4700,6 +4824,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 1, 62
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl nearbyint
@@ -4710,6 +4835,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -4738,6 +4864,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 62
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl nearbyint
@@ -4746,6 +4873,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -4927,6 +5055,10 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    stdu 1, -64(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
+; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    std 0, 80(1)
@@ -4950,6 +5082,10 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
+; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE9-NEXT:    std 0, 64(1)
@@ -5159,6 +5295,10 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    stdu 1, -64(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
+; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    std 0, 80(1)
@@ -5182,6 +5322,10 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
+; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE9-NEXT:    std 0, 64(1)
@@ -6520,6 +6664,8 @@ entry:
 define <3 x double> @constrained_vector_ceil_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_ceil_v3f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE-NEXT:    xsrdpip 3, 3
 ; PC64LE-NEXT:    xvrdpip 2, 0
@@ -6528,6 +6674,8 @@ define <3 x double> @constrained_vector_ceil_v3f64(<3 x double> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_ceil_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE9-NEXT:    xsrdpip 3, 3
 ; PC64LE9-NEXT:    xvrdpip 2, 0
@@ -6628,6 +6776,8 @@ entry:
 define <3 x double> @constrained_vector_floor_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_floor_v3f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE-NEXT:    xsrdpim 3, 3
 ; PC64LE-NEXT:    xvrdpim 2, 0
@@ -6636,6 +6786,8 @@ define <3 x double> @constrained_vector_floor_v3f64(<3 x double> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_floor_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE9-NEXT:    xsrdpim 3, 3
 ; PC64LE9-NEXT:    xvrdpim 2, 0
@@ -6736,6 +6888,8 @@ entry:
 define <3 x double> @constrained_vector_round_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_round_v3f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE-NEXT:    xsrdpi 3, 3
 ; PC64LE-NEXT:    xvrdpi 2, 0
@@ -6744,6 +6898,8 @@ define <3 x double> @constrained_vector_round_v3f64(<3 x double> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_round_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE9-NEXT:    xsrdpi 3, 3
 ; PC64LE9-NEXT:    xvrdpi 2, 0
@@ -6843,6 +6999,8 @@ entry:
 define <3 x double> @constrained_vector_trunc_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_trunc_v3f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE-NEXT:    xsrdpiz 3, 3
 ; PC64LE-NEXT:    xvrdpiz 2, 0
@@ -6851,6 +7009,8 @@ define <3 x double> @constrained_vector_trunc_v3f64(<3 x double> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_trunc_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE9-NEXT:    xsrdpiz 3, 3
 ; PC64LE9-NEXT:    xvrdpiz 2, 0
@@ -8049,6 +8209,7 @@ define <2 x double> @constrained_vector_tan_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    bl tan
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -8073,6 +8234,7 @@ define <2 x double> @constrained_vector_tan_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl tan
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -8195,6 +8357,7 @@ define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl tan
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl tan
@@ -8227,6 +8390,7 @@ define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl tan
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl tan
@@ -8270,6 +8434,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 1, 62
 ; PC64LE-NEXT:    bl tan
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl tan
@@ -8280,6 +8445,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -8308,6 +8474,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 62
 ; PC64LE9-NEXT:    bl tan
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl tan
@@ -8316,6 +8483,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    bl tan
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -8390,6 +8558,7 @@ define <2 x double> @constrained_vector_atan2_v2f64(<2 x double> %x, <2 x double
 ; PC64LE-NEXT:    bl atan2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -8420,6 +8589,7 @@ define <2 x double> @constrained_vector_atan2_v2f64(<2 x double> %x, <2 x double
 ; PC64LE9-NEXT:    xxswapd 2, 63
 ; PC64LE9-NEXT:    bl atan2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 61, 1
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 48(1) # 16-byte Folded Reload
@@ -8571,6 +8741,7 @@ define <3 x double> @constrained_vector_atan2_v3f64(<3 x double> %x, <3 x double
 ; PC64LE-NEXT:    fmr 2, 30
 ; PC64LE-NEXT:    bl atan2
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 29
 ; PC64LE-NEXT:    fmr 2, 31
@@ -8612,6 +8783,7 @@ define <3 x double> @constrained_vector_atan2_v3f64(<3 x double> %x, <3 x double
 ; PC64LE9-NEXT:    fmr 2, 30
 ; PC64LE9-NEXT:    bl atan2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 29
 ; PC64LE9-NEXT:    fmr 2, 31
@@ -8667,6 +8839,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double
 ; PC64LE-NEXT:    xxswapd 2, 62
 ; PC64LE-NEXT:    bl atan2
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 59, 1
 ; PC64LE-NEXT:    xxlor 1, 61, 61
 ; PC64LE-NEXT:    xxlor 2, 63, 63
@@ -8679,6 +8852,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 112
 ; PC64LE-NEXT:    vmr 2, 30
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 60, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 96
@@ -8717,6 +8891,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double
 ; PC64LE9-NEXT:    xxswapd 2, 62
 ; PC64LE9-NEXT:    bl atan2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 59, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 61, 61
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
@@ -8727,6 +8902,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double
 ; PC64LE9-NEXT:    xxswapd 2, 63
 ; PC64LE9-NEXT:    bl atan2
 ; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 60, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 96(1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll
new file mode 100644
index 0000000000000..ea7454faad218
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=x86_64-grtev4-linux-gnu < %s | FileCheck %s
+
+%struct.wibble = type { %struct.wombat }
+%struct.wombat = type { %struct.ham, [3 x i8] }
+%struct.ham = type { %struct.zot }
+%struct.zot = type { %struct.blam }
+%struct.blam = type { %struct.ham.0 }
+%struct.ham.0 = type { %struct.bar }
+%struct.bar = type { %struct.bar.1 }
+%struct.bar.1 = type { %struct.baz, i8 }
+%struct.baz = type { %struct.snork }
+%struct.snork = type <{ %struct.spam, i8, [3 x i8] }>
+%struct.spam = type { %struct.snork.2, %struct.snork.2 }
+%struct.snork.2 = type { i32 }
+%struct.snork.3 = type { %struct.baz, i8, [3 x i8] }
+
+define void @foo(ptr %arg, ptr %arg1, i40 %arg2, ptr %arg3, i32 %arg4) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    subq $24, %rsp
+; CHECK-NEXT:    .cfi_offset %rbx, -56
+; CHECK-NEXT:    .cfi_offset %r12, -48
+; CHECK-NEXT:    .cfi_offset %r13, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    movl %r8d, %r14d
+; CHECK-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq %rsi, %r13
+; CHECK-NEXT:    movq %rdi, %r15
+; CHECK-NEXT:    incl %r14d
+; CHECK-NEXT:    xorl %ebx, %ebx
+; CHECK-NEXT:    # implicit-def: $r12
+; CHECK-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    jmp .LBB0_3
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_1: # %bb17
+; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    movq %r15, %r13
+; CHECK-NEXT:    xorl %r15d, %r15d
+; CHECK-NEXT:    testq %rbx, %rbx
+; CHECK-NEXT:    sete %r15b
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    callq _Znwm@PLT
+; CHECK-NEXT:    shll $4, %r15d
+; CHECK-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; CHECK-NEXT:    movq %r12, %rcx
+; CHECK-NEXT:    shrq $32, %rcx
+; CHECK-NEXT:    movb %cl, 12(%rax)
+; CHECK-NEXT:    movl %r12d, 8(%rax)
+; CHECK-NEXT:    movq %r15, %rbx
+; CHECK-NEXT:    movq %r13, %r15
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; CHECK-NEXT:    decl %r14d
+; CHECK-NEXT:    je .LBB0_8
+; CHECK-NEXT:  .LBB0_3: # %bb7
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    callq widget@PLT
+; CHECK-NEXT:    cmpb $-5, (%r13)
+; CHECK-NEXT:    jae .LBB0_5
+; CHECK-NEXT:  # %bb.4: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    movl %r12d, %r12d
+; CHECK-NEXT:    cmpq %r15, %rbx
+; CHECK-NEXT:    jbe .LBB0_1
+; CHECK-NEXT:    jmp .LBB0_7
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_5: # %bb12
+; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    movq 0, %rax
+; CHECK-NEXT:    movq 8, %rax
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; CHECK-NEXT:    cmpq %r15, %rbx
+; CHECK-NEXT:    jbe .LBB0_1
+; CHECK-NEXT:  .LBB0_7: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    xorl %ebx, %ebx
+; CHECK-NEXT:    decl %r14d
+; CHECK-NEXT:    jne .LBB0_3
+; CHECK-NEXT:  .LBB0_8: # %bb21
+; CHECK-NEXT:    cmpb $0, 12(%rax)
+; CHECK-NEXT:    jne .LBB0_10
+; CHECK-NEXT:  # %bb.9: # %bb26
+; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB0_10: # %bb25
+; CHECK-NEXT:    .cfi_def_cfa %rbp, 16
+; CHECK-NEXT:    movq %r15, %rdi
+; CHECK-NEXT:    callq pluto@PLT
+bb:
+  br label %bb7
+
+bb5:                                              ; preds = %bb17, %bb14
+  %phi = phi ptr [ %call19, %bb17 ], [ null, %bb14 ]
+  %phi6 = phi ptr [ %getelementptr, %bb17 ], [ null, %bb14 ]
+  %add = add i32 %phi9, 1
+  %icmp = icmp eq i32 %phi9, %arg4
+  br i1 %icmp, label %bb21, label %bb7
+
+bb7:                                              ; preds = %bb5, %bb
+  %phi8 = phi ptr [ null, %bb ], [ %phi6, %bb5 ]
+  %phi9 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
+  %phi10 = phi i40 [ poison, %bb ], [ %phi15, %bb5 ]
+  %call = call ptr @widget()
+  %load = load i8, ptr %arg1, align 8
+  %icmp11 = icmp ult i8 %load, -5
+  %and = and i40 %phi10, 4294967295
+  br i1 %icmp11, label %bb14, label %bb12
+
+bb12:                                             ; preds = %bb7
+  %load13 = load volatile { i64, i64 }, ptr null, align 4294967296
+  br label %bb14
+
+bb14:                                             ; preds = %bb12, %bb7
+  %phi15 = phi i40 [ %and, %bb7 ], [ %arg2, %bb12 ]
+  %icmp16 = icmp ugt ptr %phi8, %arg
+  br i1 %icmp16, label %bb5, label %bb17
+
+bb17:                                             ; preds = %bb14
+  %icmp18 = icmp eq ptr %phi8, null
+  %zext = zext i1 %icmp18 to i64
+  %call19 = call ptr @_Znwm(i64 0)
+  %getelementptr = getelementptr %struct.wibble, ptr %arg3, i64 %zext
+  %getelementptr20 = getelementptr i8, ptr %call19, i64 8
+  store i40 %phi15, ptr %getelementptr20, align 4
+  br label %bb5
+
+bb21:                                             ; preds = %bb5
+  %getelementptr22 = getelementptr %struct.snork.3, ptr %phi, i64 0, i32 1
+  %load23 = load i8, ptr %getelementptr22, align 4
+  %icmp24 = icmp eq i8 %load23, 0
+  br i1 %icmp24, label %bb26, label %bb25
+
+bb25:                                             ; preds = %bb21
+  call void @pluto(ptr %arg)
+  unreachable
+
+bb26:                                             ; preds = %bb21
+  ret void
+}
+
+define void @eggs(ptr %arg, ptr %arg1) {
+; CHECK-LABEL: eggs:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    movq %rsi, %rdi
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %r8d, %r8d
+; CHECK-NEXT:    callq foo@PLT
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+bb:
+  call void @foo(ptr %arg1, ptr %arg, i40 0, ptr null, i32 0)
+  ret void
+}
+
+declare ptr @widget()
+
+declare void @pluto(ptr)
+
+declare ptr @_Znwm(i64)
+
+attributes #0 = { noinline "frame-pointer"="all" }
diff --git a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir
index 8241a1757af52..0bc208dc709d7 100644
--- a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir
+++ b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=register-coalescer -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=register-coalescer -o - %s | FileCheck %s --match-full-lines
 ---
 name:  rematerialize_subreg_to_reg_added_impdef_1
 tracksRegLiveness: true
@@ -9,7 +9,7 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.1(0x2aaaaaab), %bb.2(0x55555555)
   ; CHECK-NEXT:   liveins: $edi
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+  ; CHECK-NEXT:   undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
   ; CHECK-NEXT:   JCC_1 %bb.2, 5, implicit killed undef $eflags
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
@@ -28,7 +28,7 @@ body:             |
   ; CHECK-NEXT:   JCC_1 %bb.5, 5, implicit killed undef $eflags
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4:
-  ; CHECK-NEXT:   dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al
+  ; CHECK-NEXT:   dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al, implicit-def $al
   ; CHECK-NEXT:   RET 0, killed undef $al
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.5:
diff --git a/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir b/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir
new file mode 100644
index 0000000000000..2e6395f065e25
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -enable-subreg-liveness -verify-coalescing -o - %s | FileCheck %s
+
+---
+name: requires_new_subrange_coalesce_subreg_to_reg
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: requires_new_subrange_coalesce_subreg_to_reg
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $eax
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   undef %a.sub_32bit:gr64_with_sub_8bit = COPY $eax
+  ; CHECK-NEXT:   %b:gr32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit
+  ; CHECK-NEXT:   JCC_1 %bb.2, 4, implicit undef $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   undef %a.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+  ; CHECK-NEXT:   %c.sub_32bit:gr64 = COPY %a
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   %c.sub_32bit:gr64 = SUBREG_TO_REG %a, %b, %subreg.sub_32bit
+  ; CHECK-NEXT:   RET 0, implicit %c
+  bb.0:
+    liveins: $eax
+    %init_eax:gr32 = COPY $eax
+    %a:gr64 = SUBREG_TO_REG 0, %init_eax, %subreg.sub_32bit
+    %b:gr32 = IMPLICIT_DEF
+    %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit
+    JCC_1 %bb.2, 4, implicit undef $eflags
+
+  bb.1:
+    %imm0:gr32 = MOV32r0 implicit-def dead $eflags
+    %a = SUBREG_TO_REG 0, %imm0, %subreg.sub_32bit
+    %c.sub_32bit = COPY %a
+
+  bb.2:
+    %c.sub_32bit = SUBREG_TO_REG %a, %b, %subreg.sub_32bit
+    RET 0, implicit %c
+
+...
diff --git a/llvm/test/CodeGen/X86/pr76416.ll b/llvm/test/CodeGen/X86/pr76416.ll
new file mode 100644
index 0000000000000..68e9ef9c87f6e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr76416.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;
+; Reproducer from https://github.com/llvm/llvm-project/issues/76416
+;
+
+@load_p = external global ptr, align 8
+@load_data = external global i8, align 1
+
+define dso_local void @pr76416() {
+; CHECK-LABEL: pr76416:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    cmpl $3, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    jg .LBB0_3
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    incl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    cmpl $3, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    jle .LBB0_2
+; CHECK-NEXT:  .LBB0_3: # %for.end
+; CHECK-NEXT:    movl $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movq load_p@GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movq load_data@GOTPCREL(%rip), %rcx
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_4: # %for.cond1
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movq (%rax), %rdx
+; CHECK-NEXT:    movslq -{{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    movzbl (%rdx,%rsi), %edx
+; CHECK-NEXT:    movb %dl, (%rcx)
+; CHECK-NEXT:    leal 1(%rsi), %edx
+; CHECK-NEXT:    movl %edx, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    jmp .LBB0_4
+entry:
+  %alloca = alloca i32, align 4
+  store i32 0, ptr %alloca, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %load.from.alloca.0 = load i32, ptr %alloca, align 4
+  %cmp = icmp slt i32 %load.from.alloca.0, 4
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  call void asm sideeffect "", "{ax},~{dirflag},~{fpsr},~{flags}"(i8 0) nounwind
+  %load.from.alloca.1 = load i32, ptr %alloca, align 4
+  %inc = add nsw i32 %load.from.alloca.1, 1
+  store i32 %inc, ptr %alloca, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  store i32 0, ptr %alloca, align 4
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.cond1, %for.end
+  call void asm sideeffect "", "N{dx},~{dirflag},~{fpsr},~{flags}"(i32 poison) nounwind
+  %load.from.load_p = load ptr, ptr @load_p, align 8
+  %regs = getelementptr inbounds { [4 x i8] }, ptr %load.from.load_p, i32 0, i32 0
+  %load.from.alloca.2 = load i32, ptr %alloca, align 4
+  %idxprom = sext i32 %load.from.alloca.2 to i64
+  %arrayidx = getelementptr inbounds [4 x i8], ptr %regs, i64 0, i64 %idxprom
+  %load.with.gep.ptr = load i8, ptr %arrayidx, align 1
+  store i8 %load.with.gep.ptr, ptr @load_data, align 1
+  %load.from.alloca.3 = load i32, ptr %alloca, align 4
+  %inc2 = add nsw i32 %load.from.alloca.3, 1
+  store i32 %inc2, ptr %alloca, align 4
+  br label %for.cond1
+}
diff --git a/llvm/test/CodeGen/X86/subreg-fail.mir b/llvm/test/CodeGen/X86/subreg-fail.mir
index c8146f099b814..dc690719e8581 100644
--- a/llvm/test/CodeGen/X86/subreg-fail.mir
+++ b/llvm/test/CodeGen/X86/subreg-fail.mir
@@ -14,8 +14,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test1
-    ; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
-    ; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+    ; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg, implicit-def [[MOV32rm]] :: (volatile load (s32) from `ptr undef`)
+    ; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg, implicit-def [[MOV32rm1]] :: (volatile load (s32) from `ptr undef`)
     ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32rm1]], 32, implicit-def dead $eflags
     ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = LEA64r [[MOV32rm1]], 1, [[MOV32rm]], 256, $noreg
     ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = SHR64ri [[LEA64r]], 8, implicit-def dead $eflags
diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
new file mode 100644
index 0000000000000..ff946b76e8f61
--- /dev/null
+++ b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
@@ -0,0 +1,451 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -o - %s | FileCheck %s --match-full-lines
+
+# We cannot lose the liveness of the high subregister of %1 when
+# coalesced with %0, so introduce an implicit-def of the super
+# register on the MOV.
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64
+    ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+    ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+    ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: RET 0
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    %0:gr32 = MOV32r0 implicit-def dead $eflags
+    %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+    $rdi = COPY %1
+    CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    RET 0
+
+...
+
+---
+name: subreg_to_reg_folds_to_undef
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $rax
+
+    ; CHECK-LABEL: name: subreg_to_reg_folds_to_undef
+    ; CHECK: liveins: $rax
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64_with_sub_8bit = COPY $rax
+    ; CHECK-NEXT: undef [[MOV32rr:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rr [[COPY]].sub_32bit, implicit-def [[MOV32rr]]
+    ; CHECK-NEXT: RET 0, implicit [[MOV32rr]]
+    %0:gr64 = COPY killed $rax
+    %1:gr32 = COPY killed %0.sub_32bit
+    %2:gr32 = MOV32rr killed %1
+    %3:gr64 = SUBREG_TO_REG 0, killed %2, %subreg.sub_32bit
+    %4:gr64 = COPY killed %3
+    RET 0, implicit %4
+
+...
+
+---
+name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64
+    ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+    ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+    ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: RET 0
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+    %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit
+    $rdi = COPY %1
+    CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64
+    ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+    ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+    ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: RET 0
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
+    %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit
+    $rdi = COPY %1
+    CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg
+    ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_]].sub_8bit, implicit-def [[MOV32r0_]]
+    ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit [[MOV32r0_]]
+    ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: RET 0
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    %0:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def undef %0.sub_8bit
+    %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+    INLINEASM &"", 0, implicit %1
+    CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    RET 0
+
+...
+
+
+# Reduced realistic case which was asserting after introducing new implicit-defs
+---
+name: coalesce_needs_implicit_defs
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: coalesce_needs_implicit_defs
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $rdi
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr64 = COPY $rdi
+  ; CHECK-NEXT:   undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+  ; CHECK-NEXT:   undef [[MOV32r0_1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_1]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+  ; CHECK-NEXT:   TEST64rr [[MOV32r0_1]], [[MOV32r0_1]], implicit-def $eflags
+  ; CHECK-NEXT:   [[MOV32r0_2:%[0-9]+]].sub_8bit:gr64_with_sub_8bit = SETCCr 4, implicit killed $eflags
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+  ; CHECK-NEXT:   dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+  ; CHECK-NEXT:   CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+  ; CHECK-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+  ; CHECK-NEXT:   [[MOV32r0_2:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32r0_2]], 4, implicit-def dead $eflags
+  ; CHECK-NEXT:   [[MOV32r0_2:%[0-9]+]]:gr64_with_sub_8bit = ADD64rr [[MOV32r0_2]], [[COPY]], implicit-def dead $eflags
+  ; CHECK-NEXT:   [[MOV32r0_1:%[0-9]+]]:gr64_with_sub_8bit = COPY [[MOV32r0_2]]
+  ; CHECK-NEXT:   JMP_1 %bb.1
+  bb.0:
+    liveins: $rdi
+
+    %0:gr64 = COPY killed $rdi
+    %1:gr32 = MOV32r0 implicit-def dead $eflags
+    %2:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit
+    %3:gr64 = COPY killed %2
+
+  bb.1:
+    %4:gr64 = COPY killed %3
+    %5:gr32 = MOV32r0 implicit-def dead $eflags
+    TEST64rr killed %4, %4, implicit-def $eflags
+    %6:gr8 = SETCCr 4, implicit killed $eflags
+    %7:gr32 = COPY killed %5
+    %7.sub_8bit:gr32 = COPY killed %6
+    %8:gr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32bit
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    %9:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit
+    $rdi = COPY %9
+    CALL64r killed %9, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    %10:gr64 = COPY killed %8
+    %10:gr64 = SHL64ri %10, 4, implicit-def dead $eflags
+    %11:gr64 = COPY killed %10
+    %11:gr64 = ADD64rr %11, %0, implicit-def dead $eflags
+    %3:gr64 = COPY killed %11
+    JMP_1 %bb.1
+
+...
+
+# Make sure to add the 'undef' flag to the result register %2,
+# because the top 32bits are not defined.
+---
+name: coalesce_add_implicitdef_and_undef
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: coalesce_add_implicitdef_and_undef
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $eflags, $edx
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $edx
+  ; CHECK-NEXT:   JMP_1 %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = ADD32ri [[COPY]].sub_32bit, -34, implicit-def $eflags, implicit-def [[COPY]]
+  ; CHECK-NEXT:   FAKE_USE [[COPY]]
+  ; CHECK-NEXT:   RET 0
+  bb.0:
+    liveins: $eflags, $edx
+    %0:gr32 = COPY $edx
+    JMP_1 %bb.1
+
+  bb.1:
+    %1:gr32 = COPY %0
+    %1:gr32 = ADD32ri %1, -34, implicit-def $eflags
+    %2:gr64_with_sub_8bit = SUBREG_TO_REG 0, killed %1, %subreg.sub_32bit
+    FAKE_USE %2
+    RET 0
+...
+
+# We can't mark the destination register as 'undef' or add implicit-def
+# because the top 24 bits of %0:gr32 are retained by the SUBREG_TO_REG.
+#
+# For example, if this were to result in:
+#
+#     undef %2.sub_32bit:gr64_with_sub_8bit = COPY $edx
+#     %1:gr8 = SETCCr 4, implicit $eflags
+#     JMP_1 %bb.1
+#
+#   bb.1:
+#     undef %2.sub_8bit:gr64_with_sub_8bit = COPY %1, implicit-def %2
+#
+# Then this says that the top 56 bits of %2 are undef. That's not correct
+# because only the top 32 bits are undef.
+---
+name: coalesce_dont_add_implicitdef_or_undef
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: coalesce_dont_add_implicitdef_or_undef
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $eflags, $edx
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $edx
+  ; CHECK-NEXT:   [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
+  ; CHECK-NEXT:   JMP_1 %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]].sub_8bit:gr64_with_sub_8bit = COPY [[SETCCr]]
+  ; CHECK-NEXT:   FAKE_USE [[COPY]]
+  ; CHECK-NEXT:   RET 0
+  bb.0:
+    liveins: $eflags, $edx
+    %0:gr32 = COPY $edx
+    %1:gr8 = SETCCr 4, implicit killed $eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    %0.sub_8bit:gr32 = COPY %1
+    %2:gr64_with_sub_8bit = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+    FAKE_USE %2
+    RET 0
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def
+    ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+    ; CHECK-NEXT: CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: RET 0
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    %0:gr32 = MOV32r0 implicit-def dead $eflags
+    $rdi = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+    CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $eax
+    ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use
+    ; CHECK: liveins: $eax
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: $eax = MOV32r0 implicit-def dead $eflags
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, $eax, %subreg.sub_32bit
+    ; CHECK-NEXT: $rdi = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: CALL64r [[SUBREG_TO_REG]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: RET 0
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    $eax = MOV32r0 implicit-def dead $eflags
+    %1:gr64 = SUBREG_TO_REG 0, killed $eax, %subreg.sub_32bit
+    $rdi = COPY %1
+    CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    RET 0
+
+...
+
+# Coalesced instruction is a copy with other implicit operands
+---
+name: coalesce_copy_into_subreg_to_reg64
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $eax
+    ; CHECK-LABEL: name: coalesce_copy_into_subreg_to_reg64
+    ; CHECK: liveins: $eax
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $eax, implicit-def dead $eflags, implicit-def [[COPY]]
+    ; CHECK-NEXT: $rdi = COPY [[COPY]]
+    ; CHECK-NEXT: CALL64r [[COPY]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: RET 0
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    %0:gr32 = COPY $eax, implicit-def dead $eflags
+    %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+    $rdi = COPY %1
+    CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value
+    ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+    ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef [[MOV32r0_]].sub_32bit, implicit [[MOV32r0_]].sub_32bit, implicit-def [[MOV32r0_]]
+    ; CHECK-NEXT: $rdi = COPY [[MOV32r0_]]
+    ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ; CHECK-NEXT: RET 0
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    %0:gr32 = MOV32r0 implicit-def dead $eflags
+    INLINEASM &"", 0, implicit-def %0, implicit %0
+    %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+    $rdi = COPY %1
+    CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1
+  ; CHECK-NEXT:   JCC_1 %bb.1, 4, implicit undef $eflags
+  ; CHECK-NEXT:   JMP_1 %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   $rdi = COPY %1
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+  ; CHECK-NEXT:   CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+  ; CHECK-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+  ; CHECK-NEXT:   RET 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  bb.0:
+    INLINEASM &"", 0, implicit-def %0:gr32
+    JCC_1 %bb.1, 4, implicit undef $eflags
+    JMP_1 %bb.2
+
+  bb.1:
+    %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+    $rdi = COPY %1
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    RET 0
+
+  bb.2:
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def
+frameInfo:
+  adjustsStack: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1
+  ; CHECK-NEXT:   JCC_1 %bb.1, 4, implicit undef $eflags
+  ; CHECK-NEXT:   JMP_1 %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $rdi = COPY %1
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+  ; CHECK-NEXT:   CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+  ; CHECK-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+  ; CHECK-NEXT:   JMP_1 %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  bb.0:
+
+    INLINEASM &"", 0, implicit-def %0:gr32
+    JCC_1 %bb.1, 4, implicit undef $eflags
+    JMP_1 %bb.2
+
+  bb.1:
+    %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32bit
+    $rdi = COPY %1
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    JMP_1 %bb.1
+
+  bb.2:
+
+...

From ccd2c3e3202d25f39775a39d1565522481a14565 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Mon, 24 Nov 2025 16:13:20 +0000
Subject: [PATCH 04/37] Revert "[libcxx][ci] Temporarily disable ARM jobs"
 (#169352)

Reverts llvm/llvm-project#169318

Our builders are back online. I see them picking up existing jobs.
---
 libcxx/utils/ci/buildkite-pipeline.yml | 115 ++++++++++++-------------
 1 file changed, 57 insertions(+), 58 deletions(-)

diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml
index 8b77a06323e3d..2ac69c38ebffa 100644
--- a/libcxx/utils/ci/buildkite-pipeline.yml
+++ b/libcxx/utils/ci/buildkite-pipeline.yml
@@ -33,64 +33,63 @@ definitions:
       - "**/CMakeOutput.log"
 
 steps:
-# Linaro's ARM builders are temporarily offline.
-#- group: ARM
-#  steps:
-#  - label: AArch64
-#    command: libcxx/utils/ci/run-buildbot aarch64
-#    agents:
-#      queue: libcxx-builders-linaro-arm
-#      arch: aarch64
-#    <<: *common
-#
-#  - label: AArch64 -fno-exceptions
-#    command: libcxx/utils/ci/run-buildbot aarch64-no-exceptions
-#    agents:
-#      queue: libcxx-builders-linaro-arm
-#      arch: aarch64
-#    <<: *common
-#
-#  - label: Armv8
-#    command: libcxx/utils/ci/run-buildbot armv8
-#    agents:
-#      queue: libcxx-builders-linaro-arm
-#      arch: armv8l
-#    <<: *common
-#
-#  - label: Armv8 -fno-exceptions
-#    command: libcxx/utils/ci/run-buildbot armv8-no-exceptions
-#    agents:
-#      queue: libcxx-builders-linaro-arm
-#      arch: armv8l
-#    <<: *common
-#
-#  - label: Armv7
-#    command: libcxx/utils/ci/run-buildbot armv7
-#    agents:
-#      queue: libcxx-builders-linaro-arm
-#      arch: armv8l
-#    <<: *common
-#
-#  - label: Armv7 -fno-exceptions
-#    command: libcxx/utils/ci/run-buildbot armv7-no-exceptions
-#    agents:
-#      queue: libcxx-builders-linaro-arm
-#      arch: armv8l
-#    <<: *common
-#
-#  - label: Armv7-M picolibc
-#    command: libcxx/utils/ci/run-buildbot armv7m-picolibc
-#    agents:
-#      queue: libcxx-builders-linaro-arm
-#      arch: aarch64
-#    <<: *common
-#
-#  - label: Armv7-M picolibc -fno-exceptions
-#    command: libcxx/utils/ci/run-buildbot armv7m-picolibc-no-exceptions
-#    agents:
-#      queue: libcxx-builders-linaro-arm
-#      arch: aarch64
-#    <<: *common
+- group: ARM
+  steps:
+  - label: AArch64
+    command: libcxx/utils/ci/run-buildbot aarch64
+    agents:
+      queue: libcxx-builders-linaro-arm
+      arch: aarch64
+    <<: *common
+
+  - label: AArch64 -fno-exceptions
+    command: libcxx/utils/ci/run-buildbot aarch64-no-exceptions
+    agents:
+      queue: libcxx-builders-linaro-arm
+      arch: aarch64
+    <<: *common
+
+  - label: Armv8
+    command: libcxx/utils/ci/run-buildbot armv8
+    agents:
+      queue: libcxx-builders-linaro-arm
+      arch: armv8l
+    <<: *common
+
+  - label: Armv8 -fno-exceptions
+    command: libcxx/utils/ci/run-buildbot armv8-no-exceptions
+    agents:
+      queue: libcxx-builders-linaro-arm
+      arch: armv8l
+    <<: *common
+
+  - label: Armv7
+    command: libcxx/utils/ci/run-buildbot armv7
+    agents:
+      queue: libcxx-builders-linaro-arm
+      arch: armv8l
+    <<: *common
+
+  - label: Armv7 -fno-exceptions
+    command: libcxx/utils/ci/run-buildbot armv7-no-exceptions
+    agents:
+      queue: libcxx-builders-linaro-arm
+      arch: armv8l
+    <<: *common
+
+  - label: Armv7-M picolibc
+    command: libcxx/utils/ci/run-buildbot armv7m-picolibc
+    agents:
+      queue: libcxx-builders-linaro-arm
+      arch: aarch64
+    <<: *common
+
+  - label: Armv7-M picolibc -fno-exceptions
+    command: libcxx/utils/ci/run-buildbot armv7m-picolibc-no-exceptions
+    agents:
+      queue: libcxx-builders-linaro-arm
+      arch: aarch64
+    <<: *common
 
 - group: AIX
   steps:

From e442c67a2c98a3e1e3bfcf90aaa82ba70fb92760 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu@gmail.com>
Date: Mon, 24 Nov 2025 17:22:04 +0100
Subject: [PATCH 05/37] [clangd] Fix C++20 build failure

---
 clang-tools-extra/clangd/SemanticSelection.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang-tools-extra/clangd/SemanticSelection.cpp b/clang-tools-extra/clangd/SemanticSelection.cpp
index c2dad53bcec6b..dc61a40d733f8 100644
--- a/clang-tools-extra/clangd/SemanticSelection.cpp
+++ b/clang-tools-extra/clangd/SemanticSelection.cpp
@@ -222,6 +222,9 @@ class PragmaRegionFinder {
   }
 
   void operator()(const DirectiveTree::Conditional &C) {
+    // C.Branches needs to see the DirectiveTree definition, otherwise build
+    // fails in C++20.
+    [[maybe_unused]] DirectiveTree Dummy;
     for (const auto &[_, SubTree] : C.Branches)
       walk(SubTree);
   }

From dc39fa34c3e27650bd111357d77247592b14baef Mon Sep 17 00:00:00 2001
From: Erich Keane <ekeane@nvidia.com>
Date: Mon, 24 Nov 2025 08:26:22 -0800
Subject: [PATCH 06/37] [OpenACC][CIR] copyout clause lowering on func-local
 declare (#169350)

This is identical to 'copy' and 'copyin', except it uses 'create' and
'copyout' as its entry/exit op. This patch adds the same tests, and
similar code for all of it.
---
 clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp   |  10 +
 clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp |  10 +-
 .../CIR/CodeGenOpenACC/declare-copyout.cpp    | 199 ++++++++++++++++++
 3 files changed, 216 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenOpenACC/declare-copyout.cpp

diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
index 40888e7326659..06be17b61c833 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
@@ -65,6 +65,16 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup {
           createOutOp<mlir::acc::DeleteOp>(cgf, copyin);
           break;
         }
+      } else if (auto create = val.getDefiningOp<mlir::acc::CreateOp>()) {
+        switch (create.getDataClause()) {
+        default:
+          cgf.cgm.errorNYI(declareRange,
+                           "OpenACC local declare clause create cleanup");
+          break;
+        case mlir::acc::DataClause::acc_copyout:
+          createOutOp<mlir::acc::CopyoutOp>(cgf, create);
+          break;
+        }
       } else if (val.getDefiningOp<mlir::acc::DeclareLinkOp>()) {
         // Link has no exit clauses, and shouldn't be copied.
         continue;
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
index 1e7a332d1dc22..06098919cb1b0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
@@ -853,12 +853,16 @@ class OpenACCClauseCIREmitter final
             var, mlir::acc::DataClause::acc_copyout, clause.getModifierList(),
             /*structured=*/false,
             /*implicit=*/false);
+    } else if constexpr (isOneOfTypes<OpTy, mlir::acc::DeclareEnterOp>) {
+      for (const Expr *var : clause.getVarList())
+        addDataOperand<mlir::acc::CreateOp>(
+            var, mlir::acc::DataClause::acc_copyout, clause.getModifierList(),
+            /*structured=*/true,
+            /*implicit=*/false);
     } else if constexpr (isCombinedType<OpTy>) {
       applyToComputeOp(clause);
     } else {
-      // TODO: When we've implemented this for everything, switch this to an
-      // unreachable. declare construct remains.
-      return clauseNotImplemented(clause);
+      llvm_unreachable("Unknown construct kind in VisitCopyOutClause");
     }
   }
 
diff --git a/clang/test/CIR/CodeGenOpenACC/declare-copyout.cpp b/clang/test/CIR/CodeGenOpenACC/declare-copyout.cpp
new file mode 100644
index 0000000000000..1d79cef894d5e
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/declare-copyout.cpp
@@ -0,0 +1,199 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+struct HasSideEffects {
+  HasSideEffects();
+  ~HasSideEffects();
+};
+
+// TODO: OpenACC: Implement 'global', NS lowering.
+
+struct Struct {
+  static const HasSideEffects StaticMemHSE;
+  static const HasSideEffects StaticMemHSEArr[5];
+  static const int StaticMemInt;
+
+  // TODO: OpenACC: Implement static-local lowering.
+
+  void MemFunc1(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) {
+    // CHECK: cir.func {{.*}}MemFunc1{{.*}}(%{{.*}}: !cir.ptr<!rec_Struct>{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr<!rec_HasSideEffects>{{.*}})
+    // CHECK-NEXT: cir.alloca{{.*}}["this"
+    // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE"
+    // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt
+    // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_HasSideEffects>{{.*}}["ArgHSEPtr"
+    // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE
+    // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasSideEffects x 5>{{.*}}["LocalHSEArr
+    // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.load
+
+    HasSideEffects LocalHSE;
+    // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr<!rec_HasSideEffects>) -> ()
+    HasSideEffects LocalHSEArr[5];
+    int LocalInt;
+
+#pragma acc declare copyout(always:ArgHSE, ArgInt, LocalHSE, LocalInt, ArgHSEPtr[1:1], LocalHSEArr[1:1])
+    // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier always>, name = "ArgHSE"}
+    // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier always>, name = "ArgInt"} 
+    // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier always>, name = "LocalHSE"} 
+    // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier always>, name = "LocalInt"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) -> !cir.ptr<!cir.ptr<!rec_HasSideEffects>> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier always>, name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[LOC_HSE_ARR_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) -> !cir.ptr<!cir.array<!rec_HasSideEffects x 5>> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier always>, name = "LocalHSEArr[1:1]"}
+    // CHECK-NEXT: %[[ENTER:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    //
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER]]) dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_CREATE]] : !cir.ptr<!rec_HasSideEffects>) to varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) {modifiers = #acc<data_clause_modifier always>, name = "ArgHSE"}
+    // CHECK-NEXT: acc.copyout accPtr(%[[ARG_INT_CREATE]] : !cir.ptr<!s32i>) to varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) {modifiers = #acc<data_clause_modifier always>, name = "ArgInt"}
+    // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_CREATE]] : !cir.ptr<!rec_HasSideEffects>) to varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) {modifiers = #acc<data_clause_modifier always>, name = "LocalHSE"}
+    // CHECK-NEXT: acc.copyout accPtr(%[[LOC_INT_CREATE]] : !cir.ptr<!s32i>) to varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) {modifiers = #acc<data_clause_modifier always>, name = "LocalInt"}
+    // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) to varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) {modifiers = #acc<data_clause_modifier always>, name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) to varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) {modifiers = #acc<data_clause_modifier always>, name = "LocalHSEArr[1:1]"}
+  }
+  void MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr);
+};
+
+void use() {
+  Struct s;
+  s.MemFunc1(HasSideEffects{}, 0, nullptr);
+}
+
+void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) {
+    // CHECK: cir.func {{.*}}MemFunc2{{.*}}(%{{.*}}: !cir.ptr<!rec_Struct>{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr<!rec_HasSideEffects>{{.*}})
+    // CHECK-NEXT: cir.alloca{{.*}}["this"
+    // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE"
+    // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt
+    // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_HasSideEffects>{{.*}}["ArgHSEPtr"
+    // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE
+    // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasSideEffects x 5>{{.*}}["LocalHSEArr
+    // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.load
+    HasSideEffects LocalHSE;
+    // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr<!rec_HasSideEffects>) -> ()
+    HasSideEffects LocalHSEArr[5];
+    // CHECK: do {
+    // CHECK: } while {
+    // CHECK: }
+    int LocalInt;
+#pragma acc declare copyout(alwaysout:ArgHSE, ArgInt, ArgHSEPtr[1:1])
+    // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier alwaysout>, name = "ArgHSE"}
+    // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier alwaysout>, name = "ArgInt"} 
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) -> !cir.ptr<!cir.ptr<!rec_HasSideEffects>> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier alwaysout>, name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+
+#pragma acc declare copyout(alwaysout:LocalHSE, LocalInt, LocalHSEArr[1:1])
+    // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier alwaysout>, name = "LocalHSE"} 
+    // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier alwaysout>, name = "LocalInt"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[LOC_HSE_ARR_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) -> !cir.ptr<!cir.array<!rec_HasSideEffects x 5>> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier alwaysout>, name = "LocalHSEArr[1:1]"}
+    // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_CREATE]] : !cir.ptr<!rec_HasSideEffects>) to varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) {modifiers = #acc<data_clause_modifier alwaysout>, name = "LocalHSE"}
+    // CHECK-NEXT: acc.copyout accPtr(%[[LOC_INT_CREATE]] : !cir.ptr<!s32i>) to varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) {modifiers = #acc<data_clause_modifier alwaysout>, name = "LocalInt"}
+    // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) to varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) {modifiers = #acc<data_clause_modifier alwaysout>, name = "LocalHSEArr[1:1]"}
+    //
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+    // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_CREATE]] : !cir.ptr<!rec_HasSideEffects>) to varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) {modifiers = #acc<data_clause_modifier alwaysout>, name = "ArgHSE"}
+    // CHECK-NEXT: acc.copyout accPtr(%[[ARG_INT_CREATE]] : !cir.ptr<!s32i>) to varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) {modifiers = #acc<data_clause_modifier alwaysout>, name = "ArgInt"}
+    // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) to varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) {modifiers = #acc<data_clause_modifier alwaysout>, name = "ArgHSEPtr[1:1]"}
+}
+
+extern "C" void do_thing();
+
+extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) {
+    // CHECK: cir.func {{.*}}NormalFunc(%[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr<!rec_HasSideEffects>{{.*}})
+    // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE"
+    // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt
+    // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_HasSideEffects>{{.*}}["ArgHSEPtr"
+    // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE
+    // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasSideEffects x 5>{{.*}}["LocalHSEArr
+    // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    HasSideEffects LocalHSE;
+    // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr<!rec_HasSideEffects>) -> ()
+    HasSideEffects LocalHSEArr[5];
+    // CHECK: do {
+    // CHECK: } while {
+    // CHECK: }
+    int LocalInt;
+#pragma acc declare copyout(always:ArgHSE, ArgInt, ArgHSEPtr[1:1])
+    // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier always>, name = "ArgHSE"}
+    // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier always>, name = "ArgInt"} 
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) -> !cir.ptr<!cir.ptr<!rec_HasSideEffects>> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier always>, name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+    {
+      // CHECK-NEXT: cir.scope {
+#pragma acc declare copyout(LocalHSE, LocalInt, LocalHSEArr[1:1])
+    // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {dataClause = #acc<data_clause acc_copyout>, name = "LocalHSE"} 
+    // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copyout>, name = "LocalInt"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[LOC_HSE_ARR_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) -> !cir.ptr<!cir.array<!rec_HasSideEffects x 5>> {dataClause = #acc<data_clause acc_copyout>, name = "LocalHSEArr[1:1]"}
+    // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+
+    do_thing();
+    // CHECK-NEXT: cir.call @do_thing
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_CREATE]] : !cir.ptr<!rec_HasSideEffects>) to varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) {name = "LocalHSE"}
+    // CHECK-NEXT: acc.copyout accPtr(%[[LOC_INT_CREATE]] : !cir.ptr<!s32i>) to varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) {name = "LocalInt"}
+    // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) to varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) {name = "LocalHSEArr[1:1]"}
+    }
+    // CHECK-NEXT: }
+
+    // Make sure that cleanup gets put in the right scope.
+    do_thing();
+    // CHECK-NEXT: cir.call @do_thing
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+ 
+    // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_CREATE]] : !cir.ptr<!rec_HasSideEffects>) to varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) {modifiers = #acc<data_clause_modifier always>, name = "ArgHSE"}
+    // CHECK-NEXT: acc.copyout accPtr(%[[ARG_INT_CREATE]] : !cir.ptr<!s32i>) to varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) {modifiers = #acc<data_clause_modifier always>, name = "ArgInt"}
+    // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) to varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) {modifiers = #acc<data_clause_modifier always>, name = "ArgHSEPtr[1:1]"}
+}
+

From 870f581f702e6bb85c59670492c9998aacc3dacf Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser@berlin.de>
Date: Mon, 24 Nov 2025 17:26:34 +0100
Subject: [PATCH 07/37] [libc++] Disable header deprecations until #168041 is
 landed (#169305)

The `#warning` causes diagnostics if system headers include deprecated
headers. #168041 will add a way to deprecated headers properly, which
then also interacts nicely with system header suppression.
---
 libcxx/include/__config                                    | 5 ++++-
 libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp  | 5 +++++
 libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp   | 5 +++++
 libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp | 5 +++++
 libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp  | 5 +++++
 libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp   | 5 +++++
 6 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index d79ace0cbb896..1b27f28f9ddef 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -546,7 +546,10 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_DEPRECATED_(m)
 #  endif
 
-#  if defined(__DEPRECATED) && __DEPRECATED && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS)
+// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be
+// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that
+// allows suppression in system headers.
+#  if defined(__DEPRECATED) && __DEPRECATED && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) && 0
 #    define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 1
 #  else
 #    define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 0
diff --git a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp
index 8df89d0ba9206..900ca0e5e1c5e 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp
@@ -14,6 +14,11 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // UNSUPPORTED: clang-modules-build
 
+// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be
+// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that
+// allows suppression in system headers.
+// XFAIL: *
+
 #include <ccomplex>
 
 // expected-warning@ccomplex:* {{<ccomplex> is deprecated in C++17 and removed in C++20. Include <complex> instead.}}
diff --git a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp
index 32b57033331c8..a1ca842bc62ab 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp
@@ -14,6 +14,11 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: clang-modules-build
 
+// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be
+// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that
+// allows suppression in system headers.
+// XFAIL: *
+
 #include <ciso646>
 
 // expected-warning@ciso646:* {{<ciso646> is removed in C++20. Include <version> instead.}}
diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp
index 23a7709a9d658..503a87658ac02 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp
@@ -14,6 +14,11 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // UNSUPPORTED: clang-modules-build
 
+// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be
+// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that
+// allows suppression in system headers.
+// XFAIL: *
+
 #include <cstdalign>
 
 // expected-warning@cstdalign:* {{<cstdalign> is deprecated in C++17 and removed in C++20.}}
diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp
index c2c0f03c52d3c..80025c5ab72d2 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp
@@ -14,6 +14,11 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // UNSUPPORTED: clang-modules-build
 
+// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be
+// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that
+// allows suppression in system headers.
+// XFAIL: *
+
 #include <cstdbool>
 
 // expected-warning@cstdbool:* {{<cstdbool> is deprecated in C++17 and removed in C++20.}}
diff --git a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp
index 4f5564915443d..07bdd29648a68 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp
@@ -14,6 +14,11 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // UNSUPPORTED: clang-modules-build
 
+// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be
+// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that
+// allows suppression in system headers.
+// XFAIL: *
+
 #include <ctgmath>
 
 // expected-warning@ctgmath:* {{<ctgmath> is deprecated in C++17 and removed in C++20. Include <cmath> and <complex> instead.}}

From ab7145231b9d6a87d528a344456a77793c75614d Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Mon, 24 Nov 2025 17:33:51 +0100
Subject: [PATCH 08/37] [Support] Permit "default" string in AllocToken mode
 parsing (#169351)

Update getAllocTokenModeFromString() to recognize "default" as a valid
mode string, mapping it to `DefaultAllocTokenMode`.
---
 llvm/lib/Support/AllocToken.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Support/AllocToken.cpp b/llvm/lib/Support/AllocToken.cpp
index 8e9e89f0df353..daa40d4e9dcc6 100644
--- a/llvm/lib/Support/AllocToken.cpp
+++ b/llvm/lib/Support/AllocToken.cpp
@@ -24,6 +24,7 @@ llvm::getAllocTokenModeFromString(StringRef Name) {
       .Case("random", AllocTokenMode::Random)
       .Case("typehash", AllocTokenMode::TypeHash)
       .Case("typehashpointersplit", AllocTokenMode::TypeHashPointerSplit)
+      .Case("default", DefaultAllocTokenMode)
       .Default(std::nullopt);
 }
 

From f31e1cf012c3029ef7619db25f5074b69b550e59 Mon Sep 17 00:00:00 2001
From: Kseniya Tikhomirova <kseniya.tikhomirova@intel.com>
Date: Mon, 24 Nov 2025 17:34:01 +0100
Subject: [PATCH 09/37] [libsycl] Add Maintainers.md file (#168550)

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 libsycl/Maintainers.md | 13 +++++++++++++
 llvm/Maintainers.md    |  2 ++
 2 files changed, 15 insertions(+)
 create mode 100644 libsycl/Maintainers.md

diff --git a/libsycl/Maintainers.md b/libsycl/Maintainers.md
new file mode 100644
index 0000000000000..4ffc9e87d3bdd
--- /dev/null
+++ b/libsycl/Maintainers.md
@@ -0,0 +1,13 @@
+# libsycl Maintainers
+
+This file is a list of the
+[maintainers](https://llvm.org/docs/DeveloperPolicy.html#maintainers) for
+the SYCL Runtime library.
+
+# Current Maintainers
+
+Alexey Bader \
+| alexey.bader@intel.com (email), bader (GitHub, Discord, Discourse)
+
+Kseniya Tikhomirova \
+| kseniya.tikhomirova@intel.com (email), KseniyaTikhomirova (GitHub, Discourse)
\ No newline at end of file
diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md
index b0d4e46bb508f..2f897332f40c9 100644
--- a/llvm/Maintainers.md
+++ b/llvm/Maintainers.md
@@ -511,6 +511,8 @@ Some subprojects maintain their own list of per-component maintainers.
 
 [libclc maintainers](https://github.com/llvm/llvm-project/blob/main/libclc/Maintainers.md)
 
+[libsycl maintainers](https://github.com/llvm/llvm-project/blob/main/libsycl/Maintainers.md)
+
 [LLD maintainers](https://github.com/llvm/llvm-project/blob/main/lld/Maintainers.md)
 
 [LLDB maintainers](https://github.com/llvm/llvm-project/blob/main/lldb/Maintainers.md)

From 51fef127f29fe2225358396728d95e2d9e6af75e Mon Sep 17 00:00:00 2001
From: Felipe de Azevedo Piovezan <fpiovezan@apple.com>
Date: Mon, 24 Nov 2025 16:41:09 +0000
Subject: [PATCH 10/37] [lldb] Add const& to InstructionList parameter
 (#169342)

---
 .../InstEmulation/UnwindAssemblyInstEmulation.cpp               | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
index 987586b97dfdc..b6b073a96bcad 100644
--- a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
+++ b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
@@ -63,7 +63,7 @@ static void DumpUnwindRowsToLog(Log *log, AddressRange range,
 }
 
 static void DumpInstToLog(Log *log, Instruction &inst,
-                          InstructionList inst_list) {
+                          const InstructionList &inst_list) {
   if (!log || !log->GetVerbose())
     return;
   const bool show_address = true;

From cc0371f2a4f95614c35601f898dde7745120e8d1 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Mon, 24 Nov 2025 16:42:31 +0000
Subject: [PATCH 11/37] [AMDGPU] Use ListSeparator. NFC. (#169347)

---
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 53 ++++++-------------
 1 file changed, 17 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 8ef5874d7baf9..da287e0243d71 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -12,6 +12,7 @@
 #include "SIDefines.h"
 #include "Utils/AMDGPUAsmUtils.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
@@ -1341,12 +1342,9 @@ void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
     return;
 
   O << Name;
-  for (int I = 0; I < NumOps; ++I) {
-    if (I != 0)
-      O << ',';
-
-    O << !!(Ops[I] & Mod);
-  }
+  ListSeparator Sep(",");
+  for (int I = 0; I < NumOps; ++I)
+    O << Sep << !!(Ops[I] & Mod);
 
   if (HasDstSel) {
     O << ',' << !!(Ops[0] & SISrcMods::DST_OP_SEL);
@@ -1584,14 +1582,10 @@ void AMDGPUInstPrinter::printGPRIdxMode(const MCInst *MI, unsigned OpNo,
     O << formatHex(static_cast<uint64_t>(Val));
   } else {
     O << "gpr_idx(";
-    bool NeedComma = false;
+    ListSeparator Sep(",");
     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
-      if (Val & (1 << ModeId)) {
-        if (NeedComma)
-          O << ',';
-        O << IdSymbolic[ModeId];
-        NeedComma = true;
-      }
+      if (Val & (1 << ModeId))
+        O << Sep << IdSymbolic[ModeId];
     }
     O << ')';
   }
@@ -1798,25 +1792,16 @@ void AMDGPUInstPrinter::printSWaitCnt(const MCInst *MI, unsigned OpNo,
   bool IsDefaultLgkmcnt = Lgkmcnt == getLgkmcntBitMask(ISA);
   bool PrintAll = IsDefaultVmcnt && IsDefaultExpcnt && IsDefaultLgkmcnt;
 
-  bool NeedSpace = false;
+  ListSeparator Sep(" ");
 
-  if (!IsDefaultVmcnt || PrintAll) {
-    O << "vmcnt(" << Vmcnt << ')';
-    NeedSpace = true;
-  }
+  if (!IsDefaultVmcnt || PrintAll)
+    O << Sep << "vmcnt(" << Vmcnt << ')';
 
-  if (!IsDefaultExpcnt || PrintAll) {
-    if (NeedSpace)
-      O << ' ';
-    O << "expcnt(" << Expcnt << ')';
-    NeedSpace = true;
-  }
+  if (!IsDefaultExpcnt || PrintAll)
+    O << Sep << "expcnt(" << Expcnt << ')';
 
-  if (!IsDefaultLgkmcnt || PrintAll) {
-    if (NeedSpace)
-      O << ' ';
-    O << "lgkmcnt(" << Lgkmcnt << ')';
-  }
+  if (!IsDefaultLgkmcnt || PrintAll)
+    O << Sep << "lgkmcnt(" << Lgkmcnt << ')';
 }
 
 void AMDGPUInstPrinter::printDepCtr(const MCInst *MI, unsigned OpNo,
@@ -1832,14 +1817,10 @@ void AMDGPUInstPrinter::printDepCtr(const MCInst *MI, unsigned OpNo,
     StringRef Name;
     unsigned Val;
     bool IsDefault;
-    bool NeedSpace = false;
+    ListSeparator Sep(" ");
     while (decodeDepCtr(Imm16, Id, Name, Val, IsDefault, STI)) {
-      if (!IsDefault || !HasNonDefaultVal) {
-        if (NeedSpace)
-          O << ' ';
-        O << Name << '(' << Val << ')';
-        NeedSpace = true;
-      }
+      if (!IsDefault || !HasNonDefaultVal)
+        O << Sep << Name << '(' << Val << ')';
     }
   } else {
     O << formatHex(Imm16);

From e3d0ac188665afe96df32bd2841f6b71b05b8790 Mon Sep 17 00:00:00 2001
From: Hristo Hristov <hghristov.rmm@gmail.com>
Date: Mon, 24 Nov 2025 18:44:44 +0200
Subject: [PATCH 12/37] [libc++][string_view] Applied `[[nodiscard]]` (#169010)

`[[nodiscard]]` should be applied to functions where discarding the
return value is most likely a correctness issue.
- https://libcxx.llvm.org/CodingGuidelines.html#apply-nodiscard-where-relevant
---
 libcxx/include/string_view                    | 137 ++++++++++--------
 .../string_view.nodiscard.verify.cpp          | 136 ++++++++++++++++-
 .../strings/string.view/nonnull.verify.cpp    |   6 +-
 3 files changed, 211 insertions(+), 68 deletions(-)

diff --git a/libcxx/include/string_view b/libcxx/include/string_view
index 5ecaa3de7deba..5dd04a9ba8479 100644
--- a/libcxx/include/string_view
+++ b/libcxx/include/string_view
@@ -362,11 +362,11 @@ public:
 #  endif
 
   // [string.view.iterators], iterators
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return cbegin(); }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return cbegin(); }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return cend(); }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return cend(); }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT {
 #  ifdef _LIBCPP_ABI_BOUNDED_ITERATORS
     return std::__make_bounded_iter(data(), data(), data() + size());
 #  else
@@ -374,7 +374,7 @@ public:
 #  endif
   }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT {
 #  ifdef _LIBCPP_ABI_BOUNDED_ITERATORS
     return std::__make_bounded_iter(data() + size(), data(), data() + size());
 #  else
@@ -382,51 +382,54 @@ public:
 #  endif
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator
+  rbegin() const _NOEXCEPT {
     return const_reverse_iterator(cend());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT {
     return const_reverse_iterator(cbegin());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator
+  crbegin() const _NOEXCEPT {
     return const_reverse_iterator(cend());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT {
     return const_reverse_iterator(cbegin());
   }
 
   // [string.view.capacity], capacity
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type length() const _NOEXCEPT { return __size_; }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type length() const _NOEXCEPT { return __size_; }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
     return numeric_limits<size_type>::max() / sizeof(value_type);
   }
 
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return __size_ == 0; }
 
   // [string.view.access], element access
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference operator[](size_type __pos) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference
+  operator[](size_type __pos) const _NOEXCEPT {
     return _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__pos < size(), "string_view[] index out of bounds"), __data_[__pos];
   }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference at(size_type __pos) const {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference at(size_type __pos) const {
     return __pos >= size() ? (__throw_out_of_range("string_view::at"), __data_[0]) : __data_[__pos];
   }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference front() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference front() const _NOEXCEPT {
     return _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "string_view::front(): string is empty"), __data_[0];
   }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT {
     return _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "string_view::back(): string is empty"), __data_[__size_ - 1];
   }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_pointer data() const _NOEXCEPT { return __data_; }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_pointer data() const _NOEXCEPT { return __data_; }
 
   // [string.view.modifiers], modifiers:
   _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI void remove_prefix(size_type __n) _NOEXCEPT {
@@ -459,7 +462,8 @@ public:
     return __rlen;
   }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI basic_string_view substr(size_type __pos = 0, size_type __n = npos) const {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI basic_string_view
+  substr(size_type __pos = 0, size_type __n = npos) const {
     // Use the `__assume_valid` form of the constructor to avoid an unnecessary check. Any substring of a view is a
     // valid view. In particular, `size()` is known to be smaller than `numeric_limits<difference_type>::max()`, so the
     // new size is also smaller. See also https://llvm.org/PR91634.
@@ -474,7 +478,7 @@ public:
   }
 #  endif
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 int compare(basic_string_view __sv) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 int compare(basic_string_view __sv) const _NOEXCEPT {
     size_type __rlen = std::min(size(), __sv.size());
     int __retval     = _Traits::compare(data(), __sv.data(), __rlen);
     if (__retval == 0) // first __rlen chars matched
@@ -482,50 +486,51 @@ public:
     return __retval;
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
   compare(size_type __pos1, size_type __n1, basic_string_view __sv) const {
     return substr(__pos1, __n1).compare(__sv);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
   compare(size_type __pos1, size_type __n1, basic_string_view __sv, size_type __pos2, size_type __n2) const {
     return substr(__pos1, __n1).compare(__sv.substr(__pos2, __n2));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
   compare(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s) const _NOEXCEPT {
     return compare(basic_string_view(__s));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
   compare(size_type __pos1, size_type __n1, const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s) const {
     return substr(__pos1, __n1).compare(basic_string_view(__s));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
   compare(size_type __pos1, size_type __n1, const _CharT* __s, size_type __n2) const
       _LIBCPP_DIAGNOSE_NULLPTR_IF(__n2 != 0 && __s == nullptr, " if n2 is not zero") {
     return substr(__pos1, __n1).compare(basic_string_view(__s, __n2));
   }
 
   // find
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find(basic_string_view __s, size_type __pos = 0) const _NOEXCEPT {
     return std::__str_find<value_type, size_type, traits_type, npos>(data(), size(), __s.data(), __pos, __s.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find(_CharT __c, size_type __pos = 0) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  find(_CharT __c, size_type __pos = 0) const _NOEXCEPT {
     return std::__str_find<value_type, size_type, traits_type, npos>(data(), size(), __c, __pos);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT
       _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find(): received nullptr");
     return std::__str_find<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = 0) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find(): received nullptr");
     return std::__str_find<value_type, size_type, traits_type, npos>(
@@ -533,24 +538,24 @@ public:
   }
 
   // rfind
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   rfind(basic_string_view __s, size_type __pos = npos) const _NOEXCEPT {
     return std::__str_rfind<value_type, size_type, traits_type, npos>(data(), size(), __s.data(), __pos, __s.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   rfind(_CharT __c, size_type __pos = npos) const _NOEXCEPT {
     return std::__str_rfind<value_type, size_type, traits_type, npos>(data(), size(), __c, __pos);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   rfind(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT
       _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::rfind(): received nullptr");
     return std::__str_rfind<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   rfind(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = npos) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::rfind(): received nullptr");
     return std::__str_rfind<value_type, size_type, traits_type, npos>(
@@ -558,25 +563,25 @@ public:
   }
 
   // find_first_of
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_first_of(basic_string_view __s, size_type __pos = 0) const _NOEXCEPT {
     return std::__str_find_first_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s.data(), __pos, __s.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_first_of(_CharT __c, size_type __pos = 0) const _NOEXCEPT {
     return find(__c, __pos);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_first_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT
       _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find_first_of(): received nullptr");
     return std::__str_find_first_of<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_first_of(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = 0) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find_first_of(): received nullptr");
     return std::__str_find_first_of<value_type, size_type, traits_type, npos>(
@@ -584,25 +589,25 @@ public:
   }
 
   // find_last_of
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_last_of(basic_string_view __s, size_type __pos = npos) const _NOEXCEPT {
     return std::__str_find_last_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s.data(), __pos, __s.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_last_of(_CharT __c, size_type __pos = npos) const _NOEXCEPT {
     return rfind(__c, __pos);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_last_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT
       _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find_last_of(): received nullptr");
     return std::__str_find_last_of<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_last_of(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = npos) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find_last_of(): received nullptr");
     return std::__str_find_last_of<value_type, size_type, traits_type, npos>(
@@ -610,25 +615,25 @@ public:
   }
 
   // find_first_not_of
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_first_not_of(basic_string_view __s, size_type __pos = 0) const _NOEXCEPT {
     return std::__str_find_first_not_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s.data(), __pos, __s.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_first_not_of(_CharT __c, size_type __pos = 0) const _NOEXCEPT {
     return std::__str_find_first_not_of<value_type, size_type, traits_type, npos>(data(), size(), __c, __pos);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_first_not_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT
       _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find_first_not_of(): received nullptr");
     return std::__str_find_first_not_of<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_first_not_of(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = 0) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find_first_not_of(): received nullptr");
     return std::__str_find_first_not_of<value_type, size_type, traits_type, npos>(
@@ -636,25 +641,25 @@ public:
   }
 
   // find_last_not_of
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_last_not_of(basic_string_view __s, size_type __pos = npos) const _NOEXCEPT {
     return std::__str_find_last_not_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s.data(), __pos, __s.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_last_not_of(_CharT __c, size_type __pos = npos) const _NOEXCEPT {
     return std::__str_find_last_not_of<value_type, size_type, traits_type, npos>(data(), size(), __c, __pos);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_last_not_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT
       _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find_last_not_of(): received nullptr");
     return std::__str_find_last_not_of<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_last_not_of(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = npos) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find_last_not_of(): received nullptr");
     return std::__str_find_last_not_of<value_type, size_type, traits_type, npos>(
@@ -662,37 +667,43 @@ public:
   }
 
 #  if _LIBCPP_STD_VER >= 20
-  constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(basic_string_view __s) const noexcept {
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(basic_string_view __s) const noexcept {
     return size() >= __s.size() && compare(0, __s.size(), __s) == 0;
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(value_type __c) const noexcept {
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(value_type __c) const noexcept {
     return !empty() && _Traits::eq(front(), __c);
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept {
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool
+  starts_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept {
     return starts_with(basic_string_view(__s));
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(basic_string_view __s) const noexcept {
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(basic_string_view __s) const noexcept {
     return size() >= __s.size() && compare(size() - __s.size(), npos, __s) == 0;
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(value_type __c) const noexcept {
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(value_type __c) const noexcept {
     return !empty() && _Traits::eq(back(), __c);
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept {
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool
+  ends_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept {
     return ends_with(basic_string_view(__s));
   }
 #  endif
 
 #  if _LIBCPP_STD_VER >= 23
-  constexpr _LIBCPP_HIDE_FROM_ABI bool contains(basic_string_view __sv) const noexcept { return find(__sv) != npos; }
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool contains(basic_string_view __sv) const noexcept {
+    return find(__sv) != npos;
+  }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool contains(value_type __c) const noexcept { return find(__c) != npos; }
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool contains(value_type __c) const noexcept {
+    return find(__c) != npos;
+  }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool contains(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const {
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool contains(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const {
     return find(__s) != npos;
   }
 #  endif
@@ -897,7 +908,8 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, basic_string_view<_CharT, _Trai
 // [string.view.hash]
 template <class _CharT>
 struct __string_view_hash : public __unary_function<basic_string_view<_CharT, char_traits<_CharT> >, size_t> {
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(const basic_string_view<_CharT, char_traits<_CharT> > __val) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t
+  operator()(const basic_string_view<_CharT, char_traits<_CharT> > __val) const _NOEXCEPT {
     return std::__do_string_hash(__val.data(), __val.data() + __val.size());
   }
 };
@@ -924,30 +936,31 @@ struct hash<basic_string_view<wchar_t, char_traits<wchar_t> > > : __string_view_
 #  if _LIBCPP_STD_VER >= 14
 inline namespace literals {
 inline namespace string_view_literals {
-inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char> operator""sv(const char* __str, size_t __len) noexcept {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char>
+operator""sv(const char* __str, size_t __len) noexcept {
   return basic_string_view<char>(__str, __len);
 }
 
 #    if _LIBCPP_HAS_WIDE_CHARACTERS
-inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<wchar_t>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<wchar_t>
 operator""sv(const wchar_t* __str, size_t __len) noexcept {
   return basic_string_view<wchar_t>(__str, __len);
 }
 #    endif
 
 #    if _LIBCPP_HAS_CHAR8_T
-inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char8_t>
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char8_t>
 operator""sv(const char8_t* __str, size_t __len) noexcept {
   return basic_string_view<char8_t>(__str, __len);
 }
 #    endif
 
-inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char16_t>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char16_t>
 operator""sv(const char16_t* __str, size_t __len) noexcept {
   return basic_string_view<char16_t>(__str, __len);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char32_t>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char32_t>
 operator""sv(const char32_t* __str, size_t __len) noexcept {
   return basic_string_view<char32_t>(__str, __len);
 }
diff --git a/libcxx/test/libcxx/diagnostics/string_view.nodiscard.verify.cpp b/libcxx/test/libcxx/diagnostics/string_view.nodiscard.verify.cpp
index e5b2258315fe4..89e4a5b44ab48 100644
--- a/libcxx/test/libcxx/diagnostics/string_view.nodiscard.verify.cpp
+++ b/libcxx/test/libcxx/diagnostics/string_view.nodiscard.verify.cpp
@@ -12,12 +12,140 @@
 
 #include <string_view>
 
+#include "type_algorithms.h"
 #include "test_macros.h"
 
-void test() {
-  std::string_view string_view;
-  string_view.empty(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+void test_members() {
+  std::string_view sv;
+
+  sv.begin();   // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.end();     // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.cbegin();  // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.cend();    // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.rbegin();  // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.rend();    // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.crbegin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.crend();   // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+
+  sv.size();     // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.length();   // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.max_size(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+
+  sv.empty(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+
+  sv[0];    // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.at(0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+
+  sv.front(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.back();  // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.data();  // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+
+  sv.substr(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
 #if TEST_STD_VER >= 26
-  string_view.subview(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.subview(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+#endif
+
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.compare(sv);
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.compare(0, 0, sv);
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.compare(0, 0, sv, 0, 0);
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.compare("");
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.compare(0, 0, "");
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.compare(0, 0, "", 0);
+
+  sv.find(sv);       // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find(' ');      // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find("", 0, 0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find("", 0);    // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+
+  sv.rfind(sv);       // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.rfind(' ');      // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.rfind("", 0, 0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.rfind("", 0);    // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_first_of(sv);
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_first_of(' ');
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_first_of("", 0, 0);
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_first_of("", 0);
+
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_last_of(sv);
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_last_of(' ');
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_last_of("", 0, 0);
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_last_of("", 0);
+
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_first_not_of(sv);
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_first_not_of(' ');
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_first_not_of("", 0, 0);
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_first_not_of("", 0);
+
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_last_not_of(sv);
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_last_not_of(' ');
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_last_not_of("", 0, 0);
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.find_last_not_of("", 0);
+
+#if TEST_STD_VER >= 20
+  sv.starts_with(sv);  // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.starts_with(' '); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.starts_with("");  // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+
+  sv.ends_with(sv);  // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.ends_with(' '); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.ends_with("");  // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+#endif
+
+#if TEST_STD_VER >= 23
+  sv.contains(sv);  // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.contains(' '); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  sv.contains("");  // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+#endif
+}
+
+void test_nonmembers() {
+  // std::hash<>
+
+  std::hash<std::string_view> hash;
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  hash(std::string_view{});
+
+#if TEST_STD_VER >= 14
+  // string_view literals
+
+  using namespace std::string_view_literals;
+
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  ""sv; // const char*
+#  if !defined(TEST_HAS_NO_WIDE_CHARACTERS)
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  L""sv; // const wchar_t*
+#  endif
+#  if !defined(TEST_HAS_NO_CHAR8_T)
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  u8""sv; // const char8_t*
+#  endif
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  u""sv; // const char16_t*
+  // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  U""sv; // const char32_t*
 #endif
 }
diff --git a/libcxx/test/libcxx/strings/string.view/nonnull.verify.cpp b/libcxx/test/libcxx/strings/string.view/nonnull.verify.cpp
index 316c9828e0de5..ffe048730e687 100644
--- a/libcxx/test/libcxx/strings/string.view/nonnull.verify.cpp
+++ b/libcxx/test/libcxx/strings/string.view/nonnull.verify.cpp
@@ -10,8 +10,10 @@
 
 // Ensure that APIs which take a CharT* are diagnosing passing a nullptr to them
 
-// Clang 19 and AppleClang don't have diagnose_if with diagnostic flags
-// UNSUPPORTED: clang-19, apple-clang-17
+// AppleClang doesn't have diagnose_if with diagnostic flags
+// UNSUPPORTED: apple-clang-17
+
+// ADDITIONAL_COMPILE_FLAGS: -Wno-unused-result
 
 #include <string_view>
 

From 78d829857656e23a7d3bc4510baf4ddcb6fce97b Mon Sep 17 00:00:00 2001
From: Erich Keane <ekeane@nvidia.com>
Date: Mon, 24 Nov 2025 08:58:44 -0800
Subject: [PATCH 13/37] [OpenACC][CIR] 'create' clause lowering on
 func-local-declare (#169356)

This one is another that is effectively identical to copy, copyin, and
copyout, except its entry/exit ops pair is create/delete.
---
 clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp   |   3 +
 clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp |  10 +-
 .../CIR/CodeGenOpenACC/declare-create.cpp     | 199 ++++++++++++++++++
 .../openacc-not-implemented.cpp               |   7 +-
 4 files changed, 211 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenOpenACC/declare-create.cpp

diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
index 06be17b61c833..bf9ec3701e6ea 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
@@ -74,6 +74,9 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup {
         case mlir::acc::DataClause::acc_copyout:
           createOutOp<mlir::acc::CopyoutOp>(cgf, create);
           break;
+        case mlir::acc::DataClause::acc_create:
+          createOutOp<mlir::acc::DeleteOp>(cgf, create);
+          break;
         }
       } else if (val.getDefiningOp<mlir::acc::DeclareLinkOp>()) {
         // Link has no exit clauses, and shouldn't be copied.
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
index 06098919cb1b0..3e229d0d76917 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
@@ -879,12 +879,16 @@ class OpenACCClauseCIREmitter final
         addDataOperand<mlir::acc::CreateOp>(
             var, mlir::acc::DataClause::acc_create, clause.getModifierList(),
             /*structured=*/false, /*implicit=*/false);
+    } else if constexpr (isOneOfTypes<OpTy, mlir::acc::DeclareEnterOp>) {
+      for (const Expr *var : clause.getVarList())
+        addDataOperand<mlir::acc::CreateOp>(
+            var, mlir::acc::DataClause::acc_create, clause.getModifierList(),
+            /*structured=*/true,
+            /*implicit=*/false);
     } else if constexpr (isCombinedType<OpTy>) {
       applyToComputeOp(clause);
     } else {
-      // TODO: When we've implemented this for everything, switch this to an
-      // unreachable. declare construct remains.
-      return clauseNotImplemented(clause);
+      llvm_unreachable("Unknown construct kind in VisitCreateClause");
     }
   }
 
diff --git a/clang/test/CIR/CodeGenOpenACC/declare-create.cpp b/clang/test/CIR/CodeGenOpenACC/declare-create.cpp
new file mode 100644
index 0000000000000..ef2f1de19ea96
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/declare-create.cpp
@@ -0,0 +1,199 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+struct HasSideEffects {
+  HasSideEffects();
+  ~HasSideEffects();
+};
+
+// TODO: OpenACC: Implement 'global', NS lowering.
+
+struct Struct {
+  static const HasSideEffects StaticMemHSE;
+  static const HasSideEffects StaticMemHSEArr[5];
+  static const int StaticMemInt;
+
+  // TODO: OpenACC: Implement static-local lowering.
+
+  void MemFunc1(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) {
+    // CHECK: cir.func {{.*}}MemFunc1{{.*}}(%{{.*}}: !cir.ptr<!rec_Struct>{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr<!rec_HasSideEffects>{{.*}})
+    // CHECK-NEXT: cir.alloca{{.*}}["this"
+    // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE"
+    // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt
+    // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_HasSideEffects>{{.*}}["ArgHSEPtr"
+    // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE
+    // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasSideEffects x 5>{{.*}}["LocalHSEArr
+    // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.load
+
+    HasSideEffects LocalHSE;
+    // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr<!rec_HasSideEffects>) -> ()
+    HasSideEffects LocalHSEArr[5];
+    int LocalInt;
+
+#pragma acc declare create(zero:ArgHSE, ArgInt, LocalHSE, LocalInt, ArgHSEPtr[1:1], LocalHSEArr[1:1])
+    // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {modifiers = #acc<data_clause_modifier zero>, name = "ArgHSE"}
+    // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier zero>, name = "ArgInt"} 
+    // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {modifiers = #acc<data_clause_modifier zero>, name = "LocalHSE"} 
+    // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier zero>, name = "LocalInt"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) -> !cir.ptr<!cir.ptr<!rec_HasSideEffects>> {modifiers = #acc<data_clause_modifier zero>, name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[LOC_HSE_ARR_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) -> !cir.ptr<!cir.array<!rec_HasSideEffects x 5>> {modifiers = #acc<data_clause_modifier zero>, name = "LocalHSEArr[1:1]"}
+    // CHECK-NEXT: %[[ENTER:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    //
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER]]) dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_CREATE]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "ArgHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_CREATE]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "ArgInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_CREATE]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "LocalHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_CREATE]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "LocalInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "LocalHSEArr[1:1]"}
+  }
+  void MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr);
+};
+
+void use() {
+  Struct s;
+  s.MemFunc1(HasSideEffects{}, 0, nullptr);
+}
+
+void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) {
+    // CHECK: cir.func {{.*}}MemFunc2{{.*}}(%{{.*}}: !cir.ptr<!rec_Struct>{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr<!rec_HasSideEffects>{{.*}})
+    // CHECK-NEXT: cir.alloca{{.*}}["this"
+    // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE"
+    // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt
+    // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_HasSideEffects>{{.*}}["ArgHSEPtr"
+    // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE
+    // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasSideEffects x 5>{{.*}}["LocalHSEArr
+    // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.load
+    HasSideEffects LocalHSE;
+    // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr<!rec_HasSideEffects>) -> ()
+    HasSideEffects LocalHSEArr[5];
+    // CHECK: do {
+    // CHECK: } while {
+    // CHECK: }
+    int LocalInt;
+#pragma acc declare create(zero:ArgHSE, ArgInt, ArgHSEPtr[1:1])
+    // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {modifiers = #acc<data_clause_modifier zero>, name = "ArgHSE"}
+    // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier zero>, name = "ArgInt"} 
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) -> !cir.ptr<!cir.ptr<!rec_HasSideEffects>> {modifiers = #acc<data_clause_modifier zero>, name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+
+#pragma acc declare create(zero:LocalHSE, LocalInt, LocalHSEArr[1:1])
+    // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {modifiers = #acc<data_clause_modifier zero>, name = "LocalHSE"} 
+    // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier zero>, name = "LocalInt"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[LOC_HSE_ARR_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) -> !cir.ptr<!cir.array<!rec_HasSideEffects x 5>> {modifiers = #acc<data_clause_modifier zero>, name = "LocalHSEArr[1:1]"}
+    // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_CREATE]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "LocalHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_CREATE]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "LocalInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "LocalHSEArr[1:1]"}
+    //
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_CREATE]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "ArgHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_CREATE]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "ArgInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "ArgHSEPtr[1:1]"}
+}
+
+extern "C" void do_thing();
+
+extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) {
+    // CHECK: cir.func {{.*}}NormalFunc(%[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr<!rec_HasSideEffects>{{.*}})
+    // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE"
+    // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt
+    // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_HasSideEffects>{{.*}}["ArgHSEPtr"
+    // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE
+    // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasSideEffects x 5>{{.*}}["LocalHSEArr
+    // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    HasSideEffects LocalHSE;
+    // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr<!rec_HasSideEffects>) -> ()
+    HasSideEffects LocalHSEArr[5];
+    // CHECK: do {
+    // CHECK: } while {
+    // CHECK: }
+    int LocalInt;
+#pragma acc declare create(zero:ArgHSE, ArgInt, ArgHSEPtr[1:1])
+    // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {modifiers = #acc<data_clause_modifier zero>, name = "ArgHSE"}
+    // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier zero>, name = "ArgInt"} 
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) -> !cir.ptr<!cir.ptr<!rec_HasSideEffects>> {modifiers = #acc<data_clause_modifier zero>, name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+    {
+      // CHECK-NEXT: cir.scope {
+#pragma acc declare create(LocalHSE, LocalInt, LocalHSEArr[1:1])
+    // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {name = "LocalHSE"} 
+    // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "LocalInt"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[LOC_HSE_ARR_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) -> !cir.ptr<!cir.array<!rec_HasSideEffects x 5>> {name = "LocalHSEArr[1:1]"}
+    // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+
+    do_thing();
+    // CHECK-NEXT: cir.call @do_thing
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_CREATE]], %[[LOC_INT_CREATE]], %[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_CREATE]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_create>, name = "LocalHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_CREATE]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_create>, name = "LocalInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_CREATE]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) {dataClause = #acc<data_clause acc_create>, name = "LocalHSEArr[1:1]"}
+    }
+    // CHECK-NEXT: }
+
+    // Make sure that cleanup gets put in the right scope.
+    do_thing();
+    // CHECK-NEXT: cir.call @do_thing
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_CREATE]], %[[ARG_INT_CREATE]], %[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+ 
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_CREATE]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "ArgHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_CREATE]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "ArgInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_CREATE]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero>, name = "ArgHSEPtr[1:1]"}
+}
+
diff --git a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp
index c8b85a12f84e7..43d91f180acaf 100644
--- a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp
@@ -1,8 +1,5 @@
 // RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fopenacc -fclangir -emit-cir %s -o %t.cir -verify
 
-void HelloWorld(int *A) {
-  extern int *E;
-
-// expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Clause: create}}
+int E, A;
+// expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Declare Construct}}
 #pragma acc declare link(E) create(A)
-}

From bab1c2971a31f032a6c353a0076d16e564ab50fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gergely=20B=C3=A1lint?= <gergely.balint@arm.com>
Date: Mon, 24 Nov 2025 18:00:58 +0100
Subject: [PATCH 14/37] [BOLT] Extend Inliner to work on functions with Pointer
 Authentication (#162458)

The inliner uses DirectSP to check if a function has instructions that
modify the SP. Exceptions are stack Push and Pop instructions.

We can also allow pointer signing and authenticating instructions.

The inliner removes the Return instructions from the inlined functions.
If it is a fused pointer-authentication-and-return (e.g. RETAA), we have
to generate a new authentication instruction.
---
 bolt/include/bolt/Core/MCPlusBuilder.h        |  6 ++
 bolt/lib/Passes/Inliner.cpp                   | 19 ++++++
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 27 ++++++++
 bolt/test/AArch64/inline-armv8.3-returns.s    | 45 ++++++++++++++
 bolt/test/AArch64/inline-armv8.3-tailcall.s   | 46 ++++++++++++++
 bolt/test/AArch64/inline-pauth-lr.s           | 61 +++++++++++++++++++
 6 files changed, 204 insertions(+)
 create mode 100644 bolt/test/AArch64/inline-armv8.3-returns.s
 create mode 100644 bolt/test/AArch64/inline-armv8.3-tailcall.s
 create mode 100644 bolt/test/AArch64/inline-pauth-lr.s

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 69ae4fb8ddcc9..f543d2e5d79a1 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -632,6 +632,12 @@ class MCPlusBuilder {
     return false;
   }
 
+  /// Generate the matching pointer authentication instruction from a fused
+  /// pauth-and-return instruction.
+  virtual void createMatchingAuth(const MCInst &AuthAndRet, MCInst &Auth) {
+    llvm_unreachable("not implemented");
+  }
+
   /// Returns the register used as a return address. Returns std::nullopt if
   /// not applicable, such as reading the return address from a system register
   /// or from the stack.
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
index 9b28c7efde5bf..5a7d02a34b4d8 100644
--- a/bolt/lib/Passes/Inliner.cpp
+++ b/bolt/lib/Passes/Inliner.cpp
@@ -195,6 +195,13 @@ InliningInfo getInliningInfo(const BinaryFunction &BF) {
         if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
           continue;
 
+        // Pointer signing and authenticatin instructions are used around
+        // Push and Pop. These are also straightforward to handle.
+        if (BC.isAArch64() &&
+            (BC.MIB->isPSignOnLR(Inst) || BC.MIB->isPAuthOnLR(Inst) ||
+             BC.MIB->isPAuthAndRet(Inst)))
+          continue;
+
         DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) ||
                     BC.MIB->hasUseOfPhysReg(Inst, SPReg);
       }
@@ -338,6 +345,18 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB,
                                 BC.Ctx.get());
       }
 
+      // Handling fused authentication and return instructions (Armv8.3-A):
+      // if the Callee does not end in a tailcall, the return will be removed
+      // from the inlined block. If that return is RETA(A|B), we have to keep
+      // the authentication part.
+      // RETAA -> AUTIASP
+      // RETAB -> AUTIBSP
+      if (!CSIsTailCall && BC.isAArch64() && BC.MIB->isPAuthAndRet(Inst)) {
+        MCInst Auth;
+        BC.MIB->createMatchingAuth(Inst, Auth);
+        InsertII =
+            std::next(InlinedBB->insertInstruction(InsertII, std::move(Auth)));
+      }
       if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) {
         InsertII =
             std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index db3989d6b0b5f..d4fd4b78f382d 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -313,6 +313,33 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
            Inst.getOpcode() == AArch64::RETABSPPCr;
   }
 
+  void createMatchingAuth(const MCInst &AuthAndRet, MCInst &Auth) override {
+    Auth.clear();
+    Auth.setOperands(AuthAndRet.getOperands());
+    switch (AuthAndRet.getOpcode()) {
+    case AArch64::RETAA:
+      Auth.setOpcode(AArch64::AUTIASP);
+      break;
+    case AArch64::RETAB:
+      Auth.setOpcode(AArch64::AUTIBSP);
+      break;
+    case AArch64::RETAASPPCi:
+      Auth.setOpcode(AArch64::AUTIASPPCi);
+      break;
+    case AArch64::RETABSPPCi:
+      Auth.setOpcode(AArch64::AUTIBSPPCi);
+      break;
+    case AArch64::RETAASPPCr:
+      Auth.setOpcode(AArch64::AUTIASPPCr);
+      break;
+    case AArch64::RETABSPPCr:
+      Auth.setOpcode(AArch64::AUTIBSPPCr);
+      break;
+    default:
+      llvm_unreachable("Unhandled fused pauth-and-return instruction");
+    }
+  }
+
   std::optional<MCPhysReg> getSignedReg(const MCInst &Inst) const override {
     switch (Inst.getOpcode()) {
     case AArch64::PACIA:
diff --git a/bolt/test/AArch64/inline-armv8.3-returns.s b/bolt/test/AArch64/inline-armv8.3-returns.s
new file mode 100644
index 0000000000000..055b589476caf
--- /dev/null
+++ b/bolt/test/AArch64/inline-armv8.3-returns.s
@@ -0,0 +1,45 @@
+# This test checks that inlining functions with fused pointer-auth-and-return
+# instructions is properly handled by BOLT.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown -mattr=+v8.3a %s -o %t.o
+# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt --inline-all --print-inline  --print-only=_Z3barP1A  \
+# RUN: %t.exe -o %t.bolt  | FileCheck %s
+
+# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 8 bytes.
+# CHECK: Binary Function "_Z3barP1A" after inlining {
+# CHECK-NOT: bl	_Z3fooP1A
+# CHECK: ldr	x8, [x0]
+# CHECK-NEXT: ldr	w0, [x8]
+# CHECK-NEXT: autiasp
+
+	.text
+	.globl	_Z3fooP1A
+	.type	_Z3fooP1A,@function
+_Z3fooP1A:
+    paciasp
+	ldr	x8, [x0]
+	ldr	w0, [x8]
+	retaa
+	.size	_Z3fooP1A, .-_Z3fooP1A
+
+	.globl	_Z3barP1A
+	.type	_Z3barP1A,@function
+_Z3barP1A:
+	stp	x29, x30, [sp, #-16]!
+	mov	x29, sp
+	bl	_Z3fooP1A
+	mul	w0, w0, w0
+	ldp	x29, x30, [sp], #16
+	ret
+	.size	_Z3barP1A, .-_Z3barP1A
+
+	.globl	main
+	.p2align	2
+	.type	main,@function
+main:
+	mov	w0, wzr
+	ret
+	.size	main, .-main
diff --git a/bolt/test/AArch64/inline-armv8.3-tailcall.s b/bolt/test/AArch64/inline-armv8.3-tailcall.s
new file mode 100644
index 0000000000000..78e7285fbf101
--- /dev/null
+++ b/bolt/test/AArch64/inline-armv8.3-tailcall.s
@@ -0,0 +1,46 @@
+# This test checks that inlining functions with fused pointer-auth-and-return
+# instructions into a location with a tailcall is properly handled by BOLT.
+# Because _Z3barP1A ends in a tailcall, we don't remove the return instruction
+# from the inlined block. Therefore, we should see a retaa, and not an autiasp.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown -mattr=+v8.3a %s -o %t.o
+# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt --inline-all --print-inline  --print-only=_Z3barP1A  \
+# RUN: %t.exe -o %t.bolt  | FileCheck %s
+
+# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 12 bytes.
+# CHECK: Binary Function "_Z3barP1A" after inlining {
+# CHECK-NOT: bl	_Z3fooP1A
+# CHECK:      mov	x29, sp
+# CHECK-NEXT: paciasp
+# CHECK-NEXT: ldr	x8, [x0]
+# CHECK-NEXT: ldr	w0, [x8]
+# CHECK-NEXT: retaa
+
+	.text
+	.globl	_Z3fooP1A
+	.type	_Z3fooP1A,@function
+_Z3fooP1A:
+    paciasp
+	ldr	x8, [x0]
+	ldr	w0, [x8]
+	retaa
+	.size	_Z3fooP1A, .-_Z3fooP1A
+
+	.globl	_Z3barP1A
+	.type	_Z3barP1A,@function
+_Z3barP1A:
+	stp	x29, x30, [sp, #-16]!
+	mov	x29, sp
+	b _Z3fooP1A // tailcall
+	.size	_Z3barP1A, .-_Z3barP1A
+
+	.globl	main
+	.p2align	2
+	.type	main,@function
+main:
+	mov	w0, wzr
+	ret
+	.size	main, .-main
diff --git a/bolt/test/AArch64/inline-pauth-lr.s b/bolt/test/AArch64/inline-pauth-lr.s
new file mode 100644
index 0000000000000..34f05721d5ea0
--- /dev/null
+++ b/bolt/test/AArch64/inline-pauth-lr.s
@@ -0,0 +1,61 @@
+# This test checks that inlining functions with the pauth-lr variants of
+# fused pointer-auth-and-return instructions is properly handled by BOLT.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %cflags -march=armv9.5-a+pauth-lr -O0 %s -o %t.exe -Wl,-q
+# RUN: llvm-bolt --inline-all --print-inline  --print-only=_Z3barP1A  \
+# RUN: %t.exe -o %t.bolt  | FileCheck %s
+
+# CHECK: BOLT-INFO: inlined 0 calls at 2 call sites in 2 iteration(s). Change in binary size: 16 bytes.
+# CHECK: Binary Function "_Z3barP1A" after inlining {
+# CHECK-NOT:  bl	_Z3fooP1A
+# CHECK:      paciasppc
+# CHECK-NEXT: ldr	x8, [x0]
+# CHECK-NEXT: ldr	w0, [x8]
+# CHECK-NEXT: autiasppcr x28
+# CHECK-NEXT: paciasppc
+# CHECK-NEXT: ldr	x7, [x0]
+# CHECK-NEXT: ldr	w0, [x7]
+# CHECK-NEXT: autiasppc _Z3bazP1A
+
+	.text
+	.globl	_Z3fooP1A
+	.type	_Z3fooP1A,@function
+_Z3fooP1A:
+    paciasppc
+	ldr	x8, [x0]
+	ldr	w0, [x8]
+	retaasppcr x28
+	.size	_Z3fooP1A, .-_Z3fooP1A
+
+	.text
+	.globl	_Z3bazP1A
+	.type	_Z3bazP1A,@function
+_Z3bazP1A:
+0:
+    paciasppc
+	ldr	x7, [x0]
+	ldr	w0, [x7]
+	retaasppc 0b
+	.size	_Z3bazP1A, .-_Z3bazP1A
+
+	.globl	_Z3barP1A
+	.type	_Z3barP1A,@function
+_Z3barP1A:
+	stp	x29, x30, [sp, #-16]!
+	mov	x29, sp
+	bl	_Z3fooP1A
+	bl	_Z3bazP1A
+	mul	w0, w0, w0
+	ldp	x29, x30, [sp], #16
+	ret
+	.size	_Z3barP1A, .-_Z3barP1A
+
+	.globl	main
+	.p2align	2
+	.type	main,@function
+main:
+	mov	w0, wzr
+	ret
+	.size	main, .-main

From 23907a20a5fa5c6e065b73f4515a2a072675dad5 Mon Sep 17 00:00:00 2001
From: Lucas Ste <38472950+LucasSte@users.noreply.github.com>
Date: Mon, 24 Nov 2025 14:02:50 -0300
Subject: [PATCH 15/37] [BPF] Allow libcalls behind a feature gate (#168442)

**Problem**

In Rust, checked math functions (like `checked_mul`, `overflowing_mul`,
`saturating_mul`) are part of the primitive implementation of integers
([see u64](https://doc.rust-lang.org/std/primitive.u64.html) for
instance). The Rust compiler builds the Rust
[compiler-builtins](https://github.com/rust-lang/compiler-builtins)
crate as a step in the compilation processes, since it contains the math
builtins to be lowered in the target.

For BPF, however, when using those functions in Rust we hit the
following errors:

```
ERROR llvm: <unknown>:0:0: in function func i64 (i64, i64): A call to built-in function '__multi3' is not supported.

ERROR llvm: <unknown>:0:0: in function func i64 (i64, i64): only small returns supported
```

Those errors come from the following code:

```
pub fn func(a: u64, b: u64) -> u64 {
    a.saturating_mul(b)
}
```

Those functions invoke underneath the llvm instrinc `{ i64, i1 }
@llvm.umul.with.overflow.i64(i64, i64)` or its variants.

It is very useful to use safe math operations when writing BPF code in
Rust, and I would like to add support for those in the target.

**Changes**

1. Create a target feature `allow-builtin-calls` to enable code
generation for builtin functions.
2. Implement `CanLowerReturn` to fix the error `only small returns
supported`.
3. Add code to correctly invoke lib functions.
4. Add a test case together with the corresponding C code.
---
 llvm/lib/Target/BPF/BPF.td               |  4 +++
 llvm/lib/Target/BPF/BPFISelLowering.cpp  | 23 ++++++++++++--
 llvm/lib/Target/BPF/BPFISelLowering.h    | 10 ++++++
 llvm/lib/Target/BPF/BPFSubtarget.cpp     |  1 +
 llvm/lib/Target/BPF/BPFSubtarget.h       |  3 ++
 llvm/test/CodeGen/BPF/atomic-oversize.ll |  2 --
 llvm/test/CodeGen/BPF/builtin_calls.ll   | 39 ++++++++++++++++++++++++
 llvm/test/CodeGen/BPF/struct_ret1.ll     |  2 +-
 llvm/test/CodeGen/BPF/struct_ret2.ll     |  2 +-
 9 files changed, 79 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/BPF/builtin_calls.ll

diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td
index a7aa6274f5ac1..436b7eef600e7 100644
--- a/llvm/lib/Target/BPF/BPF.td
+++ b/llvm/lib/Target/BPF/BPF.td
@@ -31,6 +31,10 @@ def MisalignedMemAccess : SubtargetFeature<"allows-misaligned-mem-access",
                                            "AllowsMisalignedMemAccess", "true",
                                            "Allows misaligned memory access">;
 
+def AllowBuiltinCall : SubtargetFeature<"allow-builtin-calls",
+                                        "AllowBuiltinCalls", "true",
+                                        "Allow calls to builtin functions">;
+
 def : Proc<"generic", []>;
 def : Proc<"v1", []>;
 def : Proc<"v2", []>;
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index a8d1faa85116b..4485c41b4c0fa 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -208,6 +208,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
   HasMovsx = STI.hasMovsx();
 
   AllowsMisalignedMemAccess = STI.getAllowsMisalignedMemAccess();
+  AllowBuiltinCalls = STI.getAllowBuiltinCalls();
 }
 
 bool BPFTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
@@ -567,9 +568,10 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     if (StringRef(E->getSymbol()) != BPF_TRAP) {
       Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
-      fail(CLI.DL, DAG,
-           Twine("A call to built-in function '" + StringRef(E->getSymbol()) +
-                 "' is not supported."));
+      if (!AllowBuiltinCalls)
+        fail(CLI.DL, DAG,
+             Twine("A call to built-in function '" + StringRef(E->getSymbol()) +
+                   "' is not supported."));
     }
   }
 
@@ -1196,3 +1198,18 @@ bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL,
 
   return true;
 }
+
+bool BPFTargetLowering::shouldSignExtendTypeInLibCall(Type *Ty,
+                                                      bool IsSigned) const {
+  return IsSigned || Ty->isIntegerTy(32);
+}
+
+bool BPFTargetLowering::CanLowerReturn(
+    CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
+    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
+    const Type *RetTy) const {
+  // At minimal return Outs.size() <= 1, or check valid types in CC.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+  return CCInfo.CheckReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index 8607e4f8c9e69..a5036e31cb61d 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -68,6 +68,8 @@ class BPFTargetLowering : public TargetLowering {
   // Allows Misalignment
   bool AllowsMisalignedMemAccess;
 
+  bool AllowBuiltinCalls;
+
   SDValue LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
@@ -163,6 +165,14 @@ class BPFTargetLowering : public TargetLowering {
   MachineBasicBlock *
   EmitInstrWithCustomInserterLDimm64(MachineInstr &MI,
                                      MachineBasicBlock *BB) const;
+
+  // Returns true if arguments should be sign-extended in lib calls.
+  bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override;
+
+  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+                      bool IsVarArg,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      LLVMContext &Context, const Type *RetTy) const override;
 };
 }
 
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
index 726f8f4b39827..77a1a5fe7444c 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -70,6 +70,7 @@ void BPFSubtarget::initializeEnvironment() {
   HasLoadAcqStoreRel = false;
   HasGotox = false;
   AllowsMisalignedMemAccess = false;
+  AllowBuiltinCalls = false;
 }
 
 void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h
index 24eff862224b0..40751fc9b7454 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.h
+++ b/llvm/lib/Target/BPF/BPFSubtarget.h
@@ -70,6 +70,8 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
   bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm,
       HasLoadAcqStoreRel, HasGotox;
 
+  bool AllowBuiltinCalls;
+
   std::unique_ptr<CallLowering> CallLoweringInfo;
   std::unique_ptr<InstructionSelector> InstSelector;
   std::unique_ptr<LegalizerInfo> Legalizer;
@@ -101,6 +103,7 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
   bool hasStoreImm() const { return HasStoreImm; }
   bool hasLoadAcqStoreRel() const { return HasLoadAcqStoreRel; }
   bool hasGotox() const { return HasGotox; }
+  bool getAllowBuiltinCalls() const { return AllowBuiltinCalls; }
 
   bool isLittleEndian() const { return IsLittleEndian; }
 
diff --git a/llvm/test/CodeGen/BPF/atomic-oversize.ll b/llvm/test/CodeGen/BPF/atomic-oversize.ll
index 187f0964d4fb8..6dc49398f091d 100644
--- a/llvm/test/CodeGen/BPF/atomic-oversize.ll
+++ b/llvm/test/CodeGen/BPF/atomic-oversize.ll
@@ -1,6 +1,4 @@
 ; RUN: llc -mtriple=bpf < %s | FileCheck %s
-; XFAIL: *
-; Doesn't currently build, with error 'only small returns supported'.
 
 define void @test(ptr %a) nounwind {
 ; CHECK-LABEL: test:
diff --git a/llvm/test/CodeGen/BPF/builtin_calls.ll b/llvm/test/CodeGen/BPF/builtin_calls.ll
new file mode 100644
index 0000000000000..18199eba7222a
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/builtin_calls.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=bpfel -mattr=+allow-builtin-calls < %s | FileCheck %s
+;
+; C code for this test case:
+;
+; long func(long a, long b) {
+;     long x;
+;     return __builtin_mul_overflow(a, b, &x);
+; }
+
+
+declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64)
+
+define noundef range(i64 0, 2) i64 @func(i64 noundef %a, i64 noundef %b) local_unnamed_addr {
+entry:
+  %0 = tail call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %a, i64 %b)
+  %1 = extractvalue { i64, i1 } %0, 1
+  %conv = zext i1 %1 to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: func
+; CHECK: r4 = r2
+; CHECK: r2 = r1
+; CHECK: r3 = r2
+; CHECK: r3 s>>= 63
+; CHECK: r5 = r4
+; CHECK: r5 s>>= 63
+; CHECK: r1 = r10
+; CHECK: r1 += -16
+; CHECK: call __multi3
+; CHECK: r1 = *(u64 *)(r10 - 16)
+; CHECK: r1 s>>= 63
+; CHECK: w0 = 1
+; CHECK: r2 = *(u64 *)(r10 - 8)
+; CHECK: if r2 != r1 goto LBB0_2
+; CHECK:  # %bb.1:                                # %entry
+; CHECK: w0 = 0
+; CHECK:  LBB0_2:                                 # %entry
+; CHECK: exit
\ No newline at end of file
diff --git a/llvm/test/CodeGen/BPF/struct_ret1.ll b/llvm/test/CodeGen/BPF/struct_ret1.ll
index 40d17ec514c48..eb66a7deacb91 100644
--- a/llvm/test/CodeGen/BPF/struct_ret1.ll
+++ b/llvm/test/CodeGen/BPF/struct_ret1.ll
@@ -1,6 +1,6 @@
 ; RUN: not llc -mtriple=bpf < %s 2> %t1
 ; RUN: FileCheck %s < %t1
-; CHECK: error: <unknown>:0:0: in function bar { i64, i32 } (i32, i32, i32, i32, i32): aggregate returns are not supported
+; CHECK: error: <unknown>:0:0: in function bar { i64, i32 } (i32, i32, i32, i32, i32): stack arguments are not supported
 
 %struct.S = type { i32, i32, i32 }
 
diff --git a/llvm/test/CodeGen/BPF/struct_ret2.ll b/llvm/test/CodeGen/BPF/struct_ret2.ll
index 170d55cc29df0..a20280949215e 100644
--- a/llvm/test/CodeGen/BPF/struct_ret2.ll
+++ b/llvm/test/CodeGen/BPF/struct_ret2.ll
@@ -1,6 +1,6 @@
 ; RUN: not llc -mtriple=bpf < %s 2> %t1
 ; RUN: FileCheck %s < %t1
-; CHECK: only small returns
+; CHECK: too many arguments
 
 ; Function Attrs: nounwind uwtable
 define { i64, i32 } @foo(i32 %a, i32 %b, i32 %c) #0 {

From 38a5dd5bc7d7f59a2acfeaff3b6852337bb6704e Mon Sep 17 00:00:00 2001
From: Shota Matsubara <40222661+baramatsubonzo@users.noreply.github.com>
Date: Tue, 25 Nov 2025 02:05:51 +0900
Subject: [PATCH 16/37] Fix typo in LLVM Kaleidoscope tutorial (Chapter 2)
 (#169319)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes a minor typo in the **Kaleidoscope tutorial (Chapter
2)**.

The sentence:
“checks to see if **if** is too low”
has been corrected to:
“checks to see if **it** is too low”.

This is a documentation-only change and does not affect any semantic
behavior or code generation.

Thank you for maintaining the tutorial, and please let me know if any
further adjustments are needed.
---
 llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.rst b/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.rst
index 6aec05f29329a..ee12d983dd99e 100644
--- a/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.rst
+++ b/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.rst
@@ -453,7 +453,7 @@ starts with:
           return LHS;
 
 This code gets the precedence of the current token and checks to see if
-if is too low. Because we defined invalid tokens to have a precedence of
+it is too low. Because we defined invalid tokens to have a precedence of
 -1, this check implicitly knows that the pair-stream ends when the token
 stream runs out of binary operators. If this check succeeds, we know
 that the token is a binary operator and that it will be included in this

From ad1be4a589b3143c2a76d521bcf205d22bb22ffe Mon Sep 17 00:00:00 2001
From: Andy Kaylor <akaylor@nvidia.com>
Date: Mon, 24 Nov 2025 09:27:38 -0800
Subject: [PATCH 17/37] [CIR] Add handling for static data members (#169134)

This adds some trivial handling to force emitting of child decls inside
C++ records.
---
 clang/include/clang/CIR/MissingFeatures.h |  1 -
 clang/lib/CIR/CodeGen/CIRGenModule.cpp    |  8 +-
 clang/test/CIR/CodeGen/static-members.cpp | 94 +++++++++++++++++++++++
 3 files changed, 100 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/static-members.cpp

diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index c99fd6f0bfcc4..7321bf4ea8963 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -240,7 +240,6 @@ struct MissingFeatures {
   static bool ctorConstLvalueToRvalueConversion() { return false; }
   static bool ctorMemcpyizer() { return false; }
   static bool cudaSupport() { return false; }
-  static bool cxxRecordStaticMembers() { return false; }
   static bool dataLayoutTypeIsSized() { return false; }
   static bool dataLayoutTypeAllocSize() { return false; }
   static bool dataLayoutTypeStoreSize() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index e09d3de5aac9a..251c99c8cd45b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -1556,10 +1556,14 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) {
     break;
 
   case Decl::ClassTemplateSpecialization:
-  case Decl::CXXRecord:
+  case Decl::CXXRecord: {
+    CXXRecordDecl *crd = cast<CXXRecordDecl>(decl);
     assert(!cir::MissingFeatures::generateDebugInfo());
-    assert(!cir::MissingFeatures::cxxRecordStaticMembers());
+    for (auto *childDecl : crd->decls())
+      if (isa<VarDecl, CXXRecordDecl, EnumDecl>(childDecl))
+        emitTopLevelDecl(childDecl);
     break;
+  }
 
   case Decl::FileScopeAsm:
     // File-scope asm is ignored during device-side CUDA compilation.
diff --git a/clang/test/CIR/CodeGen/static-members.cpp b/clang/test/CIR/CodeGen/static-members.cpp
new file mode 100644
index 0000000000000..8722dc2a2bc6f
--- /dev/null
+++ b/clang/test/CIR/CodeGen/static-members.cpp
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck %s -check-prefix=CIR --input-file=%t.cir
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck %s -check-prefix=LLVM --input-file=%t-cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck %s -check-prefix=OGCG --input-file=%t.ll
+
+struct HasDtor {
+  ~HasDtor();
+};
+struct S {
+  static inline HasDtor hd;
+};
+
+// CIR: cir.global linkonce_odr comdat @_ZN1S2hdE = #cir.zero : !rec_HasDtor
+
+// CIR: cir.func internal private @__cxx_global_var_init() {
+// CIR:   %[[HD:.*]] = cir.get_global @_ZN1S2hdE : !cir.ptr<!rec_HasDtor>
+// CIR:   %[[DTOR:.*]] = cir.get_global @_ZN7HasDtorD1Ev : !cir.ptr<!cir.func<(!cir.ptr<!rec_HasDtor>)>>
+// CIR:   %[[DTOR_CAST:.*]] = cir.cast bitcast %[[DTOR]] : !cir.ptr<!cir.func<(!cir.ptr<!rec_HasDtor>)>> -> !cir.ptr<!cir.func<(!cir.ptr<!void>)>>
+// CIR:   %[[HD_CAST:.*]] = cir.cast bitcast %[[HD]] : !cir.ptr<!rec_HasDtor> -> !cir.ptr<!void>
+// CIR:   %[[HANDLE:.*]] = cir.get_global @__dso_handle : !cir.ptr<i8>
+// CIR:   cir.call @__cxa_atexit(%[[DTOR_CAST]], %[[HD_CAST]], %[[HANDLE]])
+
+// LLVM: @_ZN1S2hdE = linkonce_odr global %struct.HasDtor zeroinitializer, comdat
+// LLVM: @_ZN5Outer5Inner2hdE = linkonce_odr global %struct.HasDtor zeroinitializer, comdat
+
+// LLVM: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_static_members.cpp, ptr null }]
+// LLVM: define internal void @__cxx_global_var_init()
+// LLVM:   call void @__cxa_atexit(ptr @_ZN7HasDtorD1Ev, ptr @_ZN1S2hdE, ptr @__dso_handle)
+
+// FIXME(cir): OGCG has a guard variable for this case that we don't generate in CIR.
+//             This is needed because the variable linkonce_odr linkage.
+
+// OGCG: @_ZN1S2hdE = linkonce_odr global %struct.HasDtor zeroinitializer, comdat
+// OGCG: @_ZGVN1S2hdE = linkonce_odr global i64 0, comdat($_ZN1S2hdE)
+// OGCG: @_ZN5Outer5Inner2hdE = linkonce_odr global %struct.HasDtor zeroinitializer, comdat
+// OGCG: @_ZGVN5Outer5Inner2hdE = linkonce_odr global i64 0, comdat($_ZN5Outer5Inner2hdE)
+// OGCG: @llvm.global_ctors = appending global [2 x { i32, ptr, ptr }] [
+// OGCG-SAME:      { i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init, ptr @_ZN1S2hdE },
+// OGCG-SAME:      { i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init.1, ptr @_ZN5Outer5Inner2hdE }]
+
+// OGCG: define internal void @__cxx_global_var_init() {{.*}} section ".text.startup" comdat($_ZN1S2hdE) {
+// OGCG:   %[[GUARD:.*]] = load atomic i8, ptr @_ZGVN1S2hdE acquire
+// OGCG:   %[[UNINIT:.*]] = icmp eq i8 %[[GUARD]], 0
+// OGCG:   br i1 %[[UNINIT]], label %[[INIT_CHECK:.*]], label %[[INIT_END:.*]]
+// OGCG: [[INIT_CHECK:.*]]:
+// OGCG:   %[[GUARD_ACQUIRE:.*]] = call i32 @__cxa_guard_acquire(ptr @_ZGVN1S2hdE)
+// OGCG:   %[[TOBOOL:.*]] = icmp ne i32 %[[GUARD_ACQUIRE]], 0
+// OGCG:   br i1 %[[TOBOOL]], label %[[INIT:.*]], label %[[INIT_END]]
+// OGCG: [[INIT:.*]]:
+// OGCG:   %[[ATEXIT:.*]] = call i32 @__cxa_atexit(ptr @_ZN7HasDtorD1Ev, ptr @_ZN1S2hdE, ptr @__dso_handle)
+// OGCG:   call void @__cxa_guard_release(ptr @_ZGVN1S2hdE)
+// OGCG:   br label %[[INIT_END]]
+// OGCG: [[INIT_END]]:
+
+struct Outer {
+  struct Inner {
+    static inline HasDtor hd;
+  };
+};
+
+// CIR: cir.global linkonce_odr comdat @_ZN5Outer5Inner2hdE = #cir.zero : !rec_HasDtor
+// CIR: cir.func internal private @__cxx_global_var_init.1()
+// CIR:   %[[HD:.*]] = cir.get_global @_ZN5Outer5Inner2hdE : !cir.ptr<!rec_HasDtor>
+// CIR:   %[[DTOR:.*]] = cir.get_global @_ZN7HasDtorD1Ev : !cir.ptr<!cir.func<(!cir.ptr<!rec_HasDtor>)>>
+// CIR:   %[[DTOR_CAST:.*]] = cir.cast bitcast %[[DTOR]] : !cir.ptr<!cir.func<(!cir.ptr<!rec_HasDtor>)>> -> !cir.ptr<!cir.func<(!cir.ptr<!void>)>>
+// CIR:   %[[HD_CAST:.*]] = cir.cast bitcast %[[HD]] : !cir.ptr<!rec_HasDtor> -> !cir.ptr<!void>
+// CIR:   %[[HANDLE:.*]] = cir.get_global @__dso_handle : !cir.ptr<i8>
+// CIR:   cir.call @__cxa_atexit(%[[DTOR_CAST]], %[[HD_CAST]], %[[HANDLE]]) : (!cir.ptr<!cir.func<(!cir.ptr<!void>)>>, !cir.ptr<!void>, !cir.ptr<i8>) -> ()
+
+// LLVM: define internal void @__cxx_global_var_init.1()
+// LLVM:   call void @__cxa_atexit(ptr @_ZN7HasDtorD1Ev, ptr @_ZN5Outer5Inner2hdE, ptr @__dso_handle)
+
+// OGCG: define internal void @__cxx_global_var_init.1() {{.*}} section ".text.startup" comdat($_ZN5Outer5Inner2hdE) {
+// OGCG:   %[[GUARD:.*]] = load atomic i8, ptr @_ZGVN5Outer5Inner2hdE acquire
+// OGCG:   %[[UNINIT:.*]] = icmp eq i8 %[[GUARD]], 0
+// OGCG:   br i1 %[[UNINIT]], label %[[INIT_CHECK:.*]], label %[[INIT_END:.*]]
+// OGCG: [[INIT_CHECK:.*]]:
+// OGCG:   %[[GUARD_ACQUIRE:.*]] = call i32 @__cxa_guard_acquire(ptr @_ZGVN5Outer5Inner2hdE)
+// OGCG:   %[[TOBOOL:.*]] = icmp ne i32 %[[GUARD_ACQUIRE]], 0
+// OGCG:   br i1 %[[TOBOOL]], label %[[INIT:.*]], label %[[INIT_END]]
+// OGCG: [[INIT:.*]]:
+// OGCG:   %[[ATEXIT:.*]] = call i32 @__cxa_atexit(ptr @_ZN7HasDtorD1Ev, ptr @_ZN5Outer5Inner2hdE, ptr @__dso_handle)
+// OGCG:   call void @__cxa_guard_release(ptr @_ZGVN5Outer5Inner2hdE)
+// OGCG:   br label %[[INIT_END]]
+// OGCG: [[INIT_END]]:
+
+
+// CIR: cir.func private @_GLOBAL__sub_I_static_members.cpp()
+// CIR:   cir.call @__cxx_global_var_init()
+
+// LLVM: define void @_GLOBAL__sub_I_static_members.cpp()
+// LLVM:   call void @__cxx_global_var_init()

From 76e9834b2908ec550bb2ca221b7652f6a5c32c46 Mon Sep 17 00:00:00 2001
From: PMylon <pmylonas@amd.com>
Date: Mon, 24 Nov 2025 18:33:45 +0100
Subject: [PATCH 18/37] [MLIR][LLVM] Support named barrier as a global variable
 type in llvm dialect (#169194)

Enables `amdgcn.named.barrier` target extension type as a global
variable type in MLIR.
---
 mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp     | 4 ++++
 mlir/test/Target/LLVMIR/target-ext-type.mlir | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp
index ce93d18f56d39..5dc4fa2b2d82f 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp
@@ -667,6 +667,7 @@ LogicalResult LLVMStructType::verifyEntries(DataLayoutEntryListRef entries,
 
 static constexpr llvm::StringRef kSpirvPrefix = "spirv.";
 static constexpr llvm::StringRef kArmSVCount = "aarch64.svcount";
+static constexpr llvm::StringRef kAMDGCNNamedBarrier = "amdgcn.named.barrier";
 
 bool LLVM::LLVMTargetExtType::hasProperty(Property prop) const {
   // See llvm/lib/IR/Type.cpp for reference.
@@ -676,6 +677,9 @@ bool LLVM::LLVMTargetExtType::hasProperty(Property prop) const {
     properties |=
         (LLVMTargetExtType::HasZeroInit | LLVM::LLVMTargetExtType::CanBeGlobal);
 
+  if (getExtTypeName() == kAMDGCNNamedBarrier)
+    properties |= LLVMTargetExtType::CanBeGlobal;
+
   return (properties & prop) == prop;
 }
 
diff --git a/mlir/test/Target/LLVMIR/target-ext-type.mlir b/mlir/test/Target/LLVMIR/target-ext-type.mlir
index 6b2d2ea3d4c23..cee630163ca21 100644
--- a/mlir/test/Target/LLVMIR/target-ext-type.mlir
+++ b/mlir/test/Target/LLVMIR/target-ext-type.mlir
@@ -6,6 +6,12 @@ llvm.mlir.global external @global() {addr_space = 0 : i32} : !llvm.target<"spirv
   llvm.return %0 : !llvm.target<"spirv.DeviceEvent">
 }
 
+// CHECK: @amdgcn_named_barrier = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
+llvm.mlir.global internal @amdgcn_named_barrier() {addr_space = 3 : i32} : !llvm.target<"amdgcn.named.barrier", 0> {
+  %0 = llvm.mlir.poison : !llvm.target<"amdgcn.named.barrier", 0>
+  llvm.return %0 : !llvm.target<"amdgcn.named.barrier", 0>
+}
+
 // CHECK-LABEL: define target("spirv.Event") @func2() {
 // CHECK-NEXT:    %1 = alloca target("spirv.Event"), align 8
 // CHECK-NEXT:    %2 = load target("spirv.Event"), ptr %1, align 8

From 79c56e8f335b231d00b06c8031d5d4c31ceb7d96 Mon Sep 17 00:00:00 2001
From: Shubham Sandeep Rastogi <Shubham.Rastogi@sony.com>
Date: Mon, 24 Nov 2025 09:37:41 -0800
Subject: [PATCH 19/37] Add support for llvm.dbg.declare_value in the
 CoroSplitter pass. (#168134)

Make sure the CoroSplitter pass correctly handles `#dbg_declare_value`
intrinsics. Which means, it should identify them, and convert them to
`#dbg_declares` so that any subsequent passes do not need to be amended
to support the `#dbg_declare_value` intrinsic.

More information here:
https://discourse.llvm.org/t/rfc-introduce-new-llvm-dbg-coroframe-entry-intrinsic/88269

This patch is the second and last in a stack of patches, with the one
preceding it being: https://github.com/llvm/llvm-project/pull/168132
---
 llvm/lib/Transforms/Coroutines/CoroFrame.cpp  | 66 ++++++++++++++++--
 .../Transforms/Coroutines/declare-value.ll    | 68 +++++++++++++++++++
 2 files changed, 129 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/Transforms/Coroutines/declare-value.ll

diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index c89af688a69ca..b07fa644baa10 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -554,6 +554,7 @@ static void cacheDIVar(FrameDataInfo &FrameData,
         DIVarCache.insert({V, (*I)->getVariable()});
     };
     CacheIt(findDVRDeclares(V));
+    CacheIt(findDVRDeclareValues(V));
   }
 }
 
@@ -1142,6 +1143,47 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
         for_each(DVRs, SalvageOne);
       }
 
+      TinyPtrVector<DbgVariableRecord *> DVRDeclareValues =
+          findDVRDeclareValues(Def);
+      // Try best to find dbg.declare_value. If the spill is a temp, there may
+      // not be a direct dbg.declare_value. Walk up the load chain to find one
+      // from an alias.
+      if (F->getSubprogram()) {
+        auto *CurDef = Def;
+        while (DVRDeclareValues.empty() && isa<LoadInst>(CurDef)) {
+          auto *LdInst = cast<LoadInst>(CurDef);
+          // Only consider ptr to ptr same type load.
+          if (LdInst->getPointerOperandType() != LdInst->getType())
+            break;
+          CurDef = LdInst->getPointerOperand();
+          if (!isa<AllocaInst, LoadInst>(CurDef))
+            break;
+          DVRDeclareValues = findDVRDeclareValues(CurDef);
+        }
+      }
+
+      auto SalvageOneCoro = [&](auto *DDI) {
+        // This dbg.declare_value is preserved for all coro-split function
+        // fragments. It will be unreachable in the main function, and
+        // processed by coro::salvageDebugInfo() by the Cloner. However, convert
+        // it to a dbg.declare to make sure future passes don't have to deal
+        // with a dbg.declare_value.
+        auto *VAM = ValueAsMetadata::get(CurrentReload);
+        Type *Ty = VAM->getValue()->getType();
+        // If the metadata type is not a pointer, emit a dbg.value instead.
+        DbgVariableRecord *NewDVR = new DbgVariableRecord(
+            ValueAsMetadata::get(CurrentReload), DDI->getVariable(),
+            DDI->getExpression(), DDI->getDebugLoc(),
+            Ty->isPointerTy() ? DbgVariableRecord::LocationType::Declare
+                              : DbgVariableRecord::LocationType::Value);
+        Builder.GetInsertPoint()->getParent()->insertDbgRecordBefore(
+            NewDVR, Builder.GetInsertPoint());
+        // This dbg.declare_value is for the main function entry point.  It
+        // will be deleted in all coro-split functions.
+        coro::salvageDebugInfo(ArgToAllocaMap, *DDI, false /*UseEntryValue*/);
+      };
+      for_each(DVRDeclareValues, SalvageOneCoro);
+
       // If we have a single edge PHINode, remove it and replace it with a
       // reload from the coroutine frame. (We already took care of multi edge
       // PHINodes by normalizing them in the rewritePHIs function).
@@ -1925,7 +1967,7 @@ void coro::salvageDebugInfo(
   Function *F = DVR.getFunction();
   // Follow the pointer arithmetic all the way to the incoming
   // function argument and convert into a DIExpression.
-  bool SkipOutermostLoad = DVR.isDbgDeclare();
+  bool SkipOutermostLoad = DVR.isDbgDeclare() || DVR.isDbgDeclareValue();
   Value *OriginalStorage = DVR.getVariableLocationOp(0);
 
   auto SalvagedInfo =
@@ -1939,10 +1981,11 @@ void coro::salvageDebugInfo(
 
   DVR.replaceVariableLocationOp(OriginalStorage, Storage);
   DVR.setExpression(Expr);
-  // We only hoist dbg.declare today since it doesn't make sense to hoist
-  // dbg.value since it does not have the same function wide guarantees that
-  // dbg.declare does.
-  if (DVR.getType() == DbgVariableRecord::LocationType::Declare) {
+  // We only hoist dbg.declare and dbg.declare_value today since it doesn't make
+  // sense to hoist dbg.value since it does not have the same function wide
+  // guarantees that dbg.declare does.
+  if (DVR.getType() == DbgVariableRecord::LocationType::Declare ||
+      DVR.getType() == DbgVariableRecord::LocationType::DeclareValue) {
     std::optional<BasicBlock::iterator> InsertPt;
     if (auto *I = dyn_cast<Instruction>(Storage)) {
       InsertPt = I->getInsertionPointAfterDef();
@@ -1957,6 +2000,19 @@ void coro::salvageDebugInfo(
       InsertPt = F->getEntryBlock().begin();
     if (InsertPt) {
       DVR.removeFromParent();
+      // If there is a dbg.declare_value being reinserted, insert it as a
+      // dbg.declare instead, so that subsequent passes don't have to deal with
+      // a dbg.declare_value.
+      if (DVR.getType() == DbgVariableRecord::LocationType::DeclareValue) {
+        auto *MD = DVR.getRawLocation();
+        if (auto *VAM = dyn_cast<ValueAsMetadata>(MD)) {
+          Type *Ty = VAM->getValue()->getType();
+          if (Ty->isPointerTy())
+            DVR.Type = DbgVariableRecord::LocationType::Declare;
+          else
+            DVR.Type = DbgVariableRecord::LocationType::Value;
+        }
+      }
       (*InsertPt)->getParent()->insertDbgRecordBefore(&DVR, *InsertPt);
     }
   }
diff --git a/llvm/test/Transforms/Coroutines/declare-value.ll b/llvm/test/Transforms/Coroutines/declare-value.ll
new file mode 100644
index 0000000000000..94049c28169b9
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/declare-value.ll
@@ -0,0 +1,68 @@
+;RUN: opt -mtriple='arm64-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s
+
+; CHECK:  %.debug = alloca double, align 8
+; CHECK-NEXT:    #dbg_declare(ptr %{{.*}}, !{{[0-9]+}}, !DIExpression(DW_OP_deref), !{{[0-9]+}})
+; CHECK-NEXT:  store double %{{[0-9]+}}, ptr %{{.*}}, align 8
+; CHECK-NEXT:    #dbg_declare(ptr %arg, !{{[0-9]+}}, !DIExpression(DW_OP_plus_uconst, 24), !{{[0-9]+}})
+
+; ModuleID = '/Users/srastogi/Development/llvm-project-2/llvm/test/Transforms/Coroutines/declare-value.ll'
+source_filename = "/Users/srastogi/Development/llvm-project-2/llvm/test/Transforms/Coroutines/declare-value.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-unknown"
+
+@coroutineATu = global <{ i32, i32 }> <{ i32 trunc (i64 sub (i64 ptrtoint (ptr @coroutineA to i64), i64 ptrtoint (ptr @coroutineATu to i64)) to i32), i32 16 }>, align 8
+
+; Function Attrs: presplitcoroutine
+define swifttailcc void @coroutineA(ptr swiftasync %arg, double %0) #0 !dbg !1 {
+  %var_with_dbg_value = alloca ptr, align 8
+  %var_with_dbg_declare = alloca ptr, align 8
+    #dbg_declare(ptr %var_with_dbg_declare, !5, !DIExpression(), !7)
+    #dbg_declare_value(double %0, !5, !DIExpression(), !7)
+  %i2 = call token @llvm.coro.id.async(i32 16, i32 16, i32 0, ptr nonnull @coroutineATu)
+  %i3 = call ptr @llvm.coro.begin(token %i2, ptr null)
+  %i7 = call ptr @llvm.coro.async.resume(), !dbg !7
+  %i10 = call { ptr } (i32, ptr, ptr, ...) @llvm.coro.suspend.async.sl_p0s(i32 0, ptr %i7, ptr nonnull @__swift_async_resume_get_context, ptr nonnull @coroutineA.1, ptr %i7, i64 0, i64 0, ptr %arg), !dbg !7
+  call void @dont_optimize(ptr %var_with_dbg_value, ptr %var_with_dbg_declare), !dbg !7
+  unreachable, !dbg !7
+}
+
+define weak_odr hidden ptr @__swift_async_resume_get_context(ptr %arg) !dbg !8 {
+  ret ptr %arg, !dbg !9
+}
+
+define hidden swifttailcc void @coroutineA.1(ptr %arg, i64 %arg1, i64 %arg2, ptr %arg3) !dbg !10 {
+  ret void, !dbg !11
+}
+
+declare void @dont_optimize(ptr, ptr)
+
+; Function Attrs: nomerge nounwind
+declare ptr @llvm.coro.async.resume() #1
+
+; Function Attrs: nounwind
+declare ptr @llvm.coro.begin(token, ptr writeonly) #2
+
+; Function Attrs: nounwind
+declare token @llvm.coro.id.async(i32, i32, i32, ptr) #2
+
+; Function Attrs: nomerge nounwind
+declare { ptr } @llvm.coro.suspend.async.sl_p0s(i32, ptr, ptr, ...) #1
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { nomerge nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = distinct !DISubprogram(scope: null, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4)
+!2 = distinct !DICompileUnit(language: DW_LANG_Swift, file: !3, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
+!3 = !DIFile(filename: "blah", directory: "")
+!4 = !{}
+!5 = !DILocalVariable(scope: !1, type: !6)
+!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Klass")
+!7 = !DILocation(line: 0, scope: !1)
+!8 = distinct !DISubprogram(scope: null, spFlags: DISPFlagDefinition, unit: !2)
+!9 = !DILocation(line: 0, scope: !8)
+!10 = distinct !DISubprogram(scope: null, spFlags: DISPFlagDefinition, unit: !2)
+!11 = !DILocation(line: 0, scope: !10)
\ No newline at end of file

From c1f24a5205364686213a23182dc45df9c2383360 Mon Sep 17 00:00:00 2001
From: Charles Zablit <c_zablit@apple.com>
Date: Mon, 24 Nov 2025 18:40:45 +0100
Subject: [PATCH 20/37] [windows] improve python3.dll load check (#168864)

---
 lldb/tools/driver/Driver.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp
index 0b77e0a4929a7..48107717abd31 100644
--- a/lldb/tools/driver/Driver.cpp
+++ b/lldb/tools/driver/Driver.cpp
@@ -477,18 +477,17 @@ bool AddPythonDLLToSearchPath() {
 #endif
 
 #ifdef LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME
-/// Returns whether `python3x.dll` is in the DLL search path.
+/// Returns true if `python3x.dll` can be loaded.
 bool IsPythonDLLInPath() {
 #define WIDEN2(x) L##x
 #define WIDEN(x) WIDEN2(x)
-  WCHAR foundPath[MAX_PATH];
-  DWORD result =
-      SearchPathW(nullptr, WIDEN(LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME), nullptr,
-                  MAX_PATH, foundPath, nullptr);
+  HMODULE h = LoadLibraryW(WIDEN(LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME));
+  if (!h)
+    return false;
+  FreeLibrary(h);
+  return true;
 #undef WIDEN2
 #undef WIDEN
-
-  return result > 0;
 }
 #endif
 

From 3843a50c69063a9440ccd65ff9a167be75baf442 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman@google.com>
Date: Mon, 24 Nov 2025 10:01:17 -0800
Subject: [PATCH 21/37] [Clang][TypePrinter] Make printNestedNameSpecifier look
 at typedefs (#169364)

This is to resolve a regression caused by #168534.

Now when we have an anonymous object like a struct or union that has a
typedef attached, we print the typedef name instead of listing it as
anonymous.
---
 clang/lib/AST/Decl.cpp                  |  4 +++-
 clang/unittests/AST/TypePrinterTest.cpp | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 555aa5c050ffd..591457b1d66b4 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -1790,7 +1790,9 @@ void NamedDecl::printNestedNameSpecifier(raw_ostream &OS,
       else
         OS << *ND;
     } else if (const auto *RD = dyn_cast<RecordDecl>(DC)) {
-      if (!RD->getIdentifier())
+      if (TypedefNameDecl *TD = RD->getTypedefNameForAnonDecl())
+        OS << *TD;
+      else if (!RD->getIdentifier())
         OS << "(anonymous " << RD->getKindName() << ')';
       else
         OS << *RD;
diff --git a/clang/unittests/AST/TypePrinterTest.cpp b/clang/unittests/AST/TypePrinterTest.cpp
index 410ec021d6e72..3cadf9b265bd1 100644
--- a/clang/unittests/AST/TypePrinterTest.cpp
+++ b/clang/unittests/AST/TypePrinterTest.cpp
@@ -341,3 +341,22 @@ TEST(TypePrinter, NestedNameSpecifiers) {
         Policy.AnonymousTagLocations = false;
       }));
 }
+
+TEST(TypePrinter, NestedNameSpecifiersTypedef) {
+  constexpr char Code[] = R"cpp(
+    typedef union {
+      struct {
+        struct {
+          unsigned int baz;
+        } bar;
+      };
+    } foo;
+  )cpp";
+
+  ASSERT_TRUE(PrintedTypeMatches(
+      Code, {}, fieldDecl(hasName("bar"), hasType(qualType().bind("id"))),
+      "struct foo::(anonymous struct)::(unnamed)", [](PrintingPolicy &Policy) {
+        Policy.FullyQualifiedName = true;
+        Policy.AnonymousTagLocations = false;
+      }));
+}

From 81f4ab83eb6fbedcede35fb1b5a4d45c3e8d5c16 Mon Sep 17 00:00:00 2001
From: Erick Velez <erickvelez7@gmail.com>
Date: Mon, 24 Nov 2025 10:08:54 -0800
Subject: [PATCH 22/37] [clang-doc] Add Mustache HTML output to namespace test
 (#169107)

This patch adds Mustache HTML tests alongside the legacy HTML backend
for namespace output. This way, we can see exactly where the output
currently differs before replacing the legacy backend.

The same thing will be done for all other tests where the legacy HTML
backend is tested.
---
 .../test/clang-doc/namespace.cpp              | 99 +++++++++++++++++++
 1 file changed, 99 insertions(+)

diff --git a/clang-tools-extra/test/clang-doc/namespace.cpp b/clang-tools-extra/test/clang-doc/namespace.cpp
index 4fa11c78d37c0..28f9556b86218 100644
--- a/clang-tools-extra/test/clang-doc/namespace.cpp
+++ b/clang-tools-extra/test/clang-doc/namespace.cpp
@@ -1,6 +1,7 @@
 // RUN: rm -rf %t && mkdir -p %t
 // RUN: clang-doc --format=html --output=%t --executor=standalone %s
 // RUN: clang-doc --format=md --output=%t --executor=standalone %s
+// RUN: clang-doc --format=mustache --output=%t --executor=standalone %s
 // RUN: FileCheck %s < %t/index_json.js -check-prefix=JSON-INDEX
 // RUN: FileCheck %s < %t/@nonymous_namespace/AnonClass.html -check-prefix=HTML-ANON-CLASS-LINE
 // RUN: FileCheck %s < %t/@nonymous_namespace/AnonClass.html -check-prefix=HTML-ANON-CLASS
@@ -38,19 +39,42 @@
 // RUN: FileCheck %s < %t/GlobalNamespace/index.md -check-prefix=MD-GLOBAL-INDEX
 // RUN: FileCheck %s < %t/all_files.md -check-prefix=MD-ALL-FILES
 // RUN: FileCheck %s < %t/index.md -check-prefix=MD-INDEX
+// RUN: FileCheck %s < %t/html/@nonymous_namespace/_ZTVN12_GLOBAL__N_19AnonClassE.html -check-prefix=MUSTACHE-ANON-CLASS-LINE
+// RUN: FileCheck %s < %t/html/@nonymous_namespace/_ZTVN12_GLOBAL__N_19AnonClassE.html -check-prefix=MUSTACHE-ANON-CLASS
+// RUN: FileCheck %s < %t/html/@nonymous_namespace/index.html -check-prefix=MUSTACHE-ANON-INDEX-LINE
+// RUN: FileCheck %s < %t/html/@nonymous_namespace/index.html -check-prefix=MUSTACHE-ANON-INDEX
+// RUN: FileCheck %s < %t/html/AnotherNamespace/_ZTVN16AnotherNamespace23ClassInAnotherNamespaceE.html -check-prefix=MUSTACHE-ANOTHER-CLASS-LINE
+// RUN: FileCheck %s < %t/html/AnotherNamespace/_ZTVN16AnotherNamespace23ClassInAnotherNamespaceE.html -check-prefix=MUSTACHE-ANOTHER-CLASS
+// RUN: FileCheck %s < %t/html/AnotherNamespace/index.html -check-prefix=MUSTACHE-ANOTHER-INDEX-LINE
+// RUN: FileCheck %s < %t/html/AnotherNamespace/index.html -check-prefix=MUSTACHE-ANOTHER-INDEX
+// RUN: FileCheck %s < %t/html/PrimaryNamespace/NestedNamespace/_ZTVN16PrimaryNamespace15NestedNamespace22ClassInNestedNamespaceE.html -check-prefix=MUSTACHE-NESTED-CLASS-LINE
+// RUN: FileCheck %s < %t/html/PrimaryNamespace/NestedNamespace/_ZTVN16PrimaryNamespace15NestedNamespace22ClassInNestedNamespaceE.html -check-prefix=MUSTACHE-NESTED-CLASS
+// RUN: FileCheck %s < %t/html/PrimaryNamespace/NestedNamespace/index.html -check-prefix=MUSTACHE-NESTED-INDEX-LINE
+// RUN: FileCheck %s < %t/html/PrimaryNamespace/NestedNamespace/index.html -check-prefix=MUSTACHE-NESTED-INDEX
+// RUN: FileCheck %s < %t/html/PrimaryNamespace/index.html -check-prefix=MUSTACHE-PRIMARY-INDEX-LINE
+// RUN: FileCheck %s < %t/html/PrimaryNamespace/index.html -check-prefix=MUSTACHE-PRIMARY-INDEX
+// RUN: FileCheck %s < %t/html/PrimaryNamespace/_ZTVN16PrimaryNamespace23ClassInPrimaryNamespaceE.html -check-prefix=MUSTACHE-PRIMARY-CLASS-LINE
+// RUN: FileCheck %s < %t/html/PrimaryNamespace/_ZTVN16PrimaryNamespace23ClassInPrimaryNamespaceE.html -check-prefix=MUSTACHE-PRIMARY-CLASS
+
+// COM: FIXME: Add global functions to the namespace template
+// COM: FIXME: Add namespaces to the namespace template
+// COM: FIXME: Add class definition location to class template
 
 // Anonymous Namespace
 namespace {
 void anonFunction() {}
 // MD-ANON-INDEX-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-ANON-INDEX-LINE: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
+// MUSTACHE-ANON-INDEX-LINE-NOT: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
 
 class AnonClass {};
 // MD-ANON-CLASS-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-ANON-CLASS-LINE: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
+// MUSTACHE-ANON-CLASS-LINE-NOT: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
 
 // MD-ANON-CLASS: # class AnonClass
 // HTML-ANON-CLASS: <h1>class AnonClass</h1>
+// MUSTACHE-ANON-CLASS: <h1 class="hero__title-large">class AnonClass</h1>
 } // namespace
 
 // MD-ANON-INDEX: # namespace @nonymous_namespace
@@ -69,17 +93,31 @@ class AnonClass {};
 // HTML-ANON-INDEX: <h3 id="{{([0-9A-F]{40})}}">anonFunction</h3>
 // HTML-ANON-INDEX: <p>void anonFunction()</p>
 
+// MUSTACHE-ANON-INDEX: <h2> @nonymous_namespace</h2>
+// MUSTACHE-ANON-INDEX:     <h2>Inner Classes</h2>
+// MUSTACHE-ANON-INDEX:         <ul class="class-container">
+// MUSTACHE-ANON-INDEX:             <li id="{{([0-9A-F]{40})}}" style="max-height: 40px;">
+// MUSTACHE-ANON-INDEX:                 <a href="_ZTVN12_GLOBAL__N_19AnonClassE.html">
+// MUSTACHE-ANON-INDEX:                     <pre><code class="language-cpp code-clang-doc">class AnonClass</code></pre>
+// MUSTACHE-ANON-INDEX:                 </a>
+// MUSTACHE-ANON-INDEX:             </li>
+// MUSTACHE-ANON-INDEX-NOT: <h2 id="Functions">Functions</h2>
+// MUSTACHE-ANON-INDEX-NOT: <h3 id="{{([0-9A-F]{40})}}">anonFunction</h3>
+// MUSTACHE-ANON-INDEX-NOT: <p>void anonFunction()</p>
+
 // Primary Namespace
 namespace PrimaryNamespace {
 // Function in PrimaryNamespace
 void functionInPrimaryNamespace() {}
 // MD-PRIMARY-INDEX-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-PRIMARY-INDEX-LINE: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
+// MUSTACHE-PRIMARY-INDEX-LINE-NOT: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
 
 // Class in PrimaryNamespace
 class ClassInPrimaryNamespace {};
 // MD-PRIMARY-CLASS-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-PRIMARY-CLASS-LINE: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
+// MUSTACHE-PRIMARY-CLASS-LINE-NOT: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
 
 // MD-PRIMARY-CLASS: # class ClassInPrimaryNamespace
 // MD-PRIMARY-CLASS: Class in PrimaryNamespace
@@ -87,23 +125,29 @@ class ClassInPrimaryNamespace {};
 // HTML-PRIMARY-CLASS: <h1>class ClassInPrimaryNamespace</h1>
 // HTML-PRIMARY-CLASS: <p> Class in PrimaryNamespace</p>
 
+// MUSTACHE-PRIMARY-CLASS: <h1 class="hero__title-large">class ClassInPrimaryNamespace</h1>
+
 // Nested namespace
 namespace NestedNamespace {
 // Function in NestedNamespace
 void functionInNestedNamespace() {}
 // MD-NESTED-INDEX-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-NESTED-INDEX-LINE: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
+// MUSTACHE-NESTED-INDEX-LINE-NOT: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
 
 // Class in NestedNamespace
 class ClassInNestedNamespace {};
 // MD-NESTED-CLASS-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-NESTED-CLASS-LINE: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
+// MUSTACHE-NESTED-CLASS-LINE-NOT: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
 
 // MD-NESTED-CLASS: # class ClassInNestedNamespace
 // MD-NESTED-CLASS: Class in NestedNamespace
 
 // HTML-NESTED-CLASS: <h1>class ClassInNestedNamespace</h1>
 // HTML-NESTED-CLASS: <p> Class in NestedNamespace</p>
+
+// MUSTACHE-NESTED-CLASS: <h1 class="hero__title-large">class ClassInNestedNamespace</h1>
 } // namespace NestedNamespace
 
 // MD-NESTED-INDEX: # namespace NestedNamespace
@@ -123,6 +167,20 @@ class ClassInNestedNamespace {};
 // HTML-NESTED-INDEX: <h3 id="{{([0-9A-F]{40})}}">functionInNestedNamespace</h3>
 // HTML-NESTED-INDEX: <p>void functionInNestedNamespace()</p>
 // HTML-NESTED-INDEX: <p> Function in NestedNamespace</p>
+
+// MUSTACHE-NESTED-INDEX: <h2> NestedNamespace</h2>
+// MUSTACHE-NESTED-INDEX:     <h2>Inner Classes</h2>
+// MUSTACHE-NESTED-INDEX:     <ul class="class-container">
+// MUSTACHE-NESTED-INDEX:         <li id="{{([0-9A-F]{40})}}" style="max-height: 40px;">
+// MUSTACHE-NESTED-INDEX:             <a href="_ZTVN16PrimaryNamespace15NestedNamespace22ClassInNestedNamespaceE.html">
+// MUSTACHE-NESTED-INDEX:                 <pre><code class="language-cpp code-clang-doc">class ClassInNestedNamespace</code></pre>
+// MUSTACHE-NESTED-INDEX:             </a>
+// MUSTACHE-NESTED-INDEX:         </li>
+// MUSTACHE-NESTED-INDEX:     </ul>
+// MUSTACHE-NESTED-INDEX-NOT: <h2 id="Functions">Functions</h2>
+// MUSTACHE-NESTED-INDEX-NOT: <h3 id="{{([0-9A-F]{40})}}">functionInNestedNamespace</h3>
+// MUSTACHE-NESTED-INDEX-NOT: <p>void functionInNestedNamespace()</p>
+// MUSTACHE-NESTED-INDEX-NOT: <p> Function in NestedNamespace</p>
 } // namespace PrimaryNamespace
 
 // MD-PRIMARY-INDEX: # namespace PrimaryNamespace
@@ -147,17 +205,35 @@ class ClassInNestedNamespace {};
 // HTML-PRIMARY-INDEX: <p>void functionInPrimaryNamespace()</p>
 // HTML-PRIMARY-INDEX: <p> Function in PrimaryNamespace</p>
 
+// MUSTACHE-PRIMARY-INDEX: <h2> PrimaryNamespace</h2>
+// MUSTACHE-PRIMARY-INDEX-NOT: <h2 id="Namespaces">Namespaces</h2>
+// MUSTACHE-PRIMARY-INDEX-NOT: <a href="NestedNamespace{{[\/]}}index.html">NestedNamespace</a>
+// MUSTACHE-PRIMARY-INDEX      <h2>Inner Classes</h2>
+// MUSTACHE-PRIMARY-INDEX          <ul class="class-container">
+// MUSTACHE-PRIMARY-INDEX              <li id="{{([0-9A-F]{40})}}" style="max-height: 40px;">
+// MUSTACHE-PRIMARY-INDEX                  <a href="_ZTVN16PrimaryNamespace23ClassInPrimaryNamespaceE.html">
+// MUSTACHE-PRIMARY-INDEX                      <pre><code class="language-cpp code-clang-doc">class ClassInPrimaryNamespace</code></pre>
+// MUSTACHE-PRIMARY-INDEX                  </a>
+// MUSTACHE-PRIMARY-INDEX              </li>
+// MUSTACHE-PRIMARY-INDEX          </ul>
+// MUSTACHE-PRIMARY-INDEX-NOT: <h2 id="Functions">Functions</h2>
+// MUSTACHE-PRIMARY-INDEX-NOT: <h3 id="{{([0-9A-F]{40})}}">functionInPrimaryNamespace</h3>
+// MUSTACHE-PRIMARY-INDEX-NOT: <p>void functionInPrimaryNamespace()</p>
+// MUSTACHE-PRIMARY-INDEX-NOT: <p> Function in PrimaryNamespace</p>
+
 // AnotherNamespace
 namespace AnotherNamespace {
 // Function in AnotherNamespace
 void functionInAnotherNamespace() {}
 // MD-ANOTHER-INDEX-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-ANOTHER-INDEX-LINE: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
+// MUSTACHE-ANOTHER-INDEX-LINE-NOT: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
 
 // Class in AnotherNamespace
 class ClassInAnotherNamespace {};
 // MD-ANOTHER-CLASS-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-ANOTHER-CLASS-LINE: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
+// MUSTACHE-ANOTHER-CLASS-LINE-NOT: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
 
 // MD-ANOTHER-CLASS: # class ClassInAnotherNamespace
 // MD-ANOTHER-CLASS:  Class in AnotherNamespace
@@ -165,6 +241,8 @@ class ClassInAnotherNamespace {};
 // HTML-ANOTHER-CLASS: <h1>class ClassInAnotherNamespace</h1>
 // HTML-ANOTHER-CLASS: <p> Class in AnotherNamespace</p>
 
+// MUSTACHE-ANOTHER-CLASS: <h1 class="hero__title-large">class ClassInAnotherNamespace</h1>
+
 } // namespace AnotherNamespace
 
 // MD-ANOTHER-INDEX: # namespace AnotherNamespace
@@ -185,6 +263,20 @@ class ClassInAnotherNamespace {};
 // HTML-ANOTHER-INDEX: <p>void functionInAnotherNamespace()</p>
 // HTML-ANOTHER-INDEX: <p> Function in AnotherNamespace</p>
 
+// MUSTACHE-ANOTHER-INDEX: <h2> AnotherNamespace</h2>
+// MUSTACHE-ANOTHER-INDEX:     <h2>Inner Classes</h2>
+// MUSTACHE-ANOTHER-INDEX:     <ul class="class-container">
+// MUSTACHE-ANOTHER-INDEX:         <li id="{{([0-9A-F]{40})}}" style="max-height: 40px;">
+// MUSTACHE-ANOTHER-INDEX:             <a href="_ZTVN16AnotherNamespace23ClassInAnotherNamespaceE.html">
+// MUSTACHE-ANOTHER-INDEX:                 <pre><code class="language-cpp code-clang-doc">class ClassInAnotherNamespace</code></pre>
+// MUSTACHE-ANOTHER-INDEX:             </a>
+// MUSTACHE-ANOTHER-INDEX:         </li>
+// MUSTACHE-ANOTHER-INDEX:     </ul>
+// MUSTACHE-ANOTHER-INDEX-NOT: <h2 id="Functions">Functions</h2>
+// MUSTACHE-ANOTHER-INDEX-NOT: <h3 id="{{([0-9A-F]{40})}}">functionInAnotherNamespace</h3>
+// MUSTACHE-ANOTHER-INDEX-NOT: <p>void functionInAnotherNamespace()</p>
+// MUSTACHE-ANOTHER-INDEX-NOT: <p> Function in AnotherNamespace</p>
+
 // JSON-INDEX: async function LoadIndex() {
 // JSON-INDEX-NEXT: return{
 // JSON-INDEX-NEXT:   "USR": "{{([0-9A-F]{40})}}",
@@ -270,6 +362,13 @@ class ClassInAnotherNamespace {};
 // HTML-GLOBAL-INDEX: <li>AnotherNamespace</li>
 // HTML-GLOBAL-INDEX: <li>PrimaryNamespace</li>
 
+// MUSTACHE-GLOBAL-INDEX: <div id="main-content" class="col-xs-12 col-sm-9 col-md-8 main-content">
+// MUSTACHE-GLOBAL-INDEX: <h1>Global Namespace</h1>
+// MUSTACHE-GLOBAL-INDEX: <h2 id="Namespaces">Namespaces</h2>
+// MUSTACHE-GLOBAL-INDEX: <li>@nonymous_namespace</li>
+// MUSTACHE-GLOBAL-INDEX: <li>AnotherNamespace</li>
+// MUSTACHE-GLOBAL-INDEX: <li>PrimaryNamespace</li>
+
 // MD-GLOBAL-INDEX: # Global Namespace
 // MD-GLOBAL-INDEX: ## Namespaces
 // MD-GLOBAL-INDEX: * [@nonymous_namespace](..{{[\/]}}@nonymous_namespace{{[\/]}}index.md)

From 37f7b3128d8217e6a99cc6117ea709e8fa7b0704 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra@codasip.com>
Date: Mon, 24 Nov 2025 18:11:58 +0000
Subject: [PATCH 23/37] Reland [VPlan] Handle WidenGEP in narrowToSingleScalars
 (#167880)

Changes: Fix a missed update to WidenGEP::usesFirstLaneOnly, and include
reduced-case test that was previously hitting the new assert: the
underlying reason was that VPWidenGEP::usesScalars was too weak, and the
single-scalar WidenGEP was not narrowed by narrowToSingleScalarRecipes.

This allows us to strip a special case in VPWidenGEP::execute.
---
 llvm/lib/Transforms/Vectorize/VPlan.h         | 15 +---
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 76 ++++++++-----------
 .../Transforms/Vectorize/VPlanTransforms.cpp  |  3 +-
 .../RISCV/gather-scatter-cost.ll              | 14 ++--
 ...row-to-single-scalar-widen-gep-scalable.ll | 60 +++++++++++++++
 .../widen-gep-all-indices-invariant.ll        | 12 +--
 6 files changed, 107 insertions(+), 73 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8a435accfedfe..0c7d9c0193a03 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1854,12 +1854,6 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
     return getOperand(I + 1)->isDefinedOutsideLoopRegions();
   }
 
-  bool areAllOperandsInvariant() const {
-    return all_of(operands(), [](VPValue *Op) {
-      return Op->isDefinedOutsideLoopRegions();
-    });
-  }
-
 public:
   VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands,
                    const VPIRFlags &Flags = {},
@@ -1898,14 +1892,7 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
   }
 
   /// Returns true if the recipe only uses the first lane of operand \p Op.
-  bool usesFirstLaneOnly(const VPValue *Op) const override {
-    assert(is_contained(operands(), Op) &&
-           "Op must be an operand of the recipe");
-    if (Op == getOperand(0))
-      return isPointerLoopInvariant();
-    else
-      return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions();
-  }
+  bool usesFirstLaneOnly(const VPValue *Op) const override;
 
 protected:
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 5ea9dd349e06f..54fdec3bcf4a1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2536,6 +2536,11 @@ void VPScalarIVStepsRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
+bool VPWidenGEPRecipe::usesFirstLaneOnly(const VPValue *Op) const {
+  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
+  return vputils::isSingleScalar(Op);
+}
+
 void VPWidenGEPRecipe::execute(VPTransformState &State) {
   assert(State.VF.isVector() && "not widening");
   // Construct a vector GEP by widening the operands of the scalar GEP as
@@ -2544,51 +2549,32 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
   // is vector-typed. Thus, to keep the representation compact, we only use
   // vector-typed operands for loop-varying values.
 
-  if (areAllOperandsInvariant()) {
-    // If we are vectorizing, but the GEP has only loop-invariant operands,
-    // the GEP we build (by only using vector-typed operands for
-    // loop-varying values) would be a scalar pointer. Thus, to ensure we
-    // produce a vector of pointers, we need to either arbitrarily pick an
-    // operand to broadcast, or broadcast a clone of the original GEP.
-    // Here, we broadcast a clone of the original.
-    //
-    // TODO: If at some point we decide to scalarize instructions having
-    //       loop-invariant operands, this special case will no longer be
-    //       required. We would add the scalarization decision to
-    //       collectLoopScalars() and teach getVectorValue() to broadcast
-    //       the lane-zero scalar value.
-    SmallVector<Value *> Ops;
-    for (unsigned I = 0, E = getNumOperands(); I != E; I++)
-      Ops.push_back(State.get(getOperand(I), VPLane(0)));
-
-    auto *NewGEP =
-        State.Builder.CreateGEP(getSourceElementType(), Ops[0], drop_begin(Ops),
-                                "", getGEPNoWrapFlags());
-    Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
-    State.set(this, Splat);
-  } else {
-    // If the GEP has at least one loop-varying operand, we are sure to
-    // produce a vector of pointers unless VF is scalar.
-    // The pointer operand of the new GEP. If it's loop-invariant, we
-    // won't broadcast it.
-    auto *Ptr = State.get(getOperand(0), isPointerLoopInvariant());
-
-    // Collect all the indices for the new GEP. If any index is
-    // loop-invariant, we won't broadcast it.
-    SmallVector<Value *, 4> Indices;
-    for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
-      VPValue *Operand = getOperand(I);
-      Indices.push_back(State.get(Operand, isIndexLoopInvariant(I - 1)));
-    }
-
-    // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
-    // but it should be a vector, otherwise.
-    auto *NewGEP = State.Builder.CreateGEP(getSourceElementType(), Ptr, Indices,
-                                           "", getGEPNoWrapFlags());
-    assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
-           "NewGEP is not a pointer vector");
-    State.set(this, NewGEP);
-  }
+  assert(
+      any_of(operands(),
+             [](VPValue *Op) { return !Op->isDefinedOutsideLoopRegions(); }) &&
+      "Expected at least one loop-variant operand");
+
+  // If the GEP has at least one loop-varying operand, we are sure to
+  // produce a vector of pointers unless VF is scalar.
+  // The pointer operand of the new GEP. If it's loop-invariant, we
+  // won't broadcast it.
+  auto *Ptr = State.get(getOperand(0), isPointerLoopInvariant());
+
+  // Collect all the indices for the new GEP. If any index is
+  // loop-invariant, we won't broadcast it.
+  SmallVector<Value *, 4> Indices;
+  for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
+    VPValue *Operand = getOperand(I);
+    Indices.push_back(State.get(Operand, isIndexLoopInvariant(I - 1)));
+  }
+
+  // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
+  // but it should be a vector, otherwise.
+  auto *NewGEP = State.Builder.CreateGEP(getSourceElementType(), Ptr, Indices,
+                                         "", getGEPNoWrapFlags());
+  assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
+         "NewGEP is not a pointer vector");
+  State.set(this, NewGEP);
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index e7a8773be067b..89b490e960f33 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1451,7 +1451,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
            vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
     for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
-      if (!isa<VPWidenRecipe, VPWidenSelectRecipe, VPReplicateRecipe>(&R))
+      if (!isa<VPWidenRecipe, VPWidenSelectRecipe, VPWidenGEPRecipe,
+               VPReplicateRecipe>(&R))
         continue;
       auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
       if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
index 212a5c99676f4..877484f5159fd 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
@@ -63,7 +63,7 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) {
 ; CHECK-NEXT:    store i32 [[STORE]], ptr [[NBRBOXES]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp sgt i32 [[IV]], [[IBOX]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
@@ -114,7 +114,7 @@ define void @predicated_strided_store(ptr %start) {
 ; RVA23-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]]
 ; RVA23-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; RVA23-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RVA23-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; RVA23-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; RVA23:       middle.block:
 ; RVA23-NEXT:    br label [[LOOP:%.*]]
 ; RVA23:       exit:
@@ -141,7 +141,7 @@ define void @predicated_strided_store(ptr %start) {
 ; RVA23ZVL1024B-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]]
 ; RVA23ZVL1024B-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; RVA23ZVL1024B-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RVA23ZVL1024B-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; RVA23ZVL1024B-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; RVA23ZVL1024B:       middle.block:
 ; RVA23ZVL1024B-NEXT:    br label [[LOOP:%.*]]
 ; RVA23ZVL1024B:       exit:
@@ -185,16 +185,16 @@ define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p0, ptr no
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[P1:%.*]], <vscale x 2 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    call void @llvm.vp.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT1]], <vscale x 2 x ptr> align 8 [[TMP5]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i64, ptr [[P2:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[P3:%.*]], <vscale x 2 x i64> [[BROADCAST_SPLAT2]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[P3:%.*]], i64 [[TMP6]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP8]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
 ; CHECK-NEXT:    call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
 ; CHECK-NEXT:    call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> align 1 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
 ; CHECK-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]]
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       exit:
diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll
new file mode 100644
index 0000000000000..6746e92cc1fd1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 6
+; RUN: opt -p loop-vectorize -force-vector-width=2 \
+; RUN:  -force-target-supports-scalable-vectors=true \
+; RUN:  -scalable-vectorization=preferred -S %s | FileCheck %s
+
+define void @widengep_narrow(ptr %in, ptr noalias %p) {
+; CHECK-LABEL: define void @widengep_narrow(
+; CHECK-SAME: ptr [[IN:%.*]], ptr noalias [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[IN]], i64 8
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP4]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP6]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP3]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i32, <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], <vscale x 2 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 2
+; CHECK-NEXT:    [[TMP10:%.*]] = sub i32 [[TMP9]], 1
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <vscale x 2 x ptr> [[TMP7]], i32 [[TMP10]]
+; CHECK-NEXT:    store ptr [[TMP11]], ptr [[P]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep.in.off = getelementptr i8, ptr %in, i64 8
+  %gep.in.iv = getelementptr i32, ptr %gep.in.off, i64 %iv
+  store ptr %gep.in.iv, ptr %p
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv, 1024
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll
index 9bb010c0431d8..90ef97609e096 100644
--- a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll
+++ b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll
@@ -8,14 +8,14 @@ define void @pr63340(ptr %A, ptr %B) {
 ; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 1
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0
-; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[B]], i8 [[OFFSET_IDX]]
-; CHECK-NEXT:    store <4 x ptr> [[DOTSPLAT]], ptr [[TMP1]], align 8
+; CHECK-NEXT:    store <4 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -55,11 +55,11 @@ define void @wide_gep_index_invariant(ptr noalias %dst, ptr noalias %src, i64 %n
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[SRC]], align 8
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[N]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, <4 x ptr> [[BROADCAST_SPLAT]], i64 [[N]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[INDEX]]
-; CHECK-NEXT:    store <4 x ptr> [[TMP1]], ptr [[TMP2]], align 8
+; CHECK-NEXT:    store <4 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]

From 9688f88e57f369002157758b8399a235bf6763ca Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra@codasip.com>
Date: Mon, 24 Nov 2025 18:12:26 +0000
Subject: [PATCH 24/37] [LV] Pre-commit test for #128062 (#164801)

In preparation to extend the work done by dfa665f ([VPlan] Add
transformation to narrow interleave groups) to make the narrowing more
powerful, pre-commit a test case from #128062.
---
 ...28062-interleaved-accesses-narrow-group.ll | 201 ++++++++++++++++++
 1 file changed, 201 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll

diff --git a/llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll b/llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll
new file mode 100644
index 0000000000000..00eeb69dcb0f7
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll
@@ -0,0 +1,201 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-interleaved-mem-accesses -S | FileCheck %s
+
+define void @pr128062(ptr %dst.start, i8 %a, i16 %b) {
+; CHECK-LABEL: define void @pr128062(
+; CHECK-SAME: ptr [[DST_START:%.*]], i8 [[A:%.*]], i16 [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
+; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-NEXT:    [[TMP0:%.*]] = zext <4 x i8> [[STRIDED_VEC]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw <4 x i16> [[TMP0]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], splat (i16 255)
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc nuw <4 x i16> [[TMP2]] to <4 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[STRIDED_VEC3]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw <4 x i16> [[TMP5]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP7:%.*]] = udiv <4 x i16> [[TMP6]], splat (i16 255)
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc nuw <4 x i16> [[TMP7]] to <4 x i8>
+; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = zext <4 x i8> [[STRIDED_VEC4]] to <4 x i16>
+; CHECK-NEXT:    [[TMP11:%.*]] = mul nuw <4 x i16> [[TMP10]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP12:%.*]] = udiv <4 x i16> [[TMP11]], splat (i16 255)
+; CHECK-NEXT:    [[TMP13:%.*]] = trunc nuw <4 x i16> [[TMP12]] to <4 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = zext <4 x i8> [[STRIDED_VEC5]] to <4 x i16>
+; CHECK-NEXT:    [[TMP16:%.*]] = mul nuw <4 x i16> [[TMP15]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP17:%.*]] = udiv <4 x i16> [[TMP16]], splat (i16 255)
+; CHECK-NEXT:    [[TMP18:%.*]] = trunc nuw <4 x i16> [[TMP17]] to <4 x i8>
+; CHECK-NEXT:    [[TMP19:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP18]]
+; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-NEXT:    store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
+; CHECK-NEXT:    br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %dst = phi ptr [ %dst.start, %entry ], [ %dst.next, %loop ]
+  %dst.next = getelementptr inbounds nuw i8, ptr %dst, i64 4
+  %load.dst = load i8, ptr %dst, align 1
+  %dst.ext = zext i8 %load.dst to i16
+  %mul.dst.0 = mul nuw i16 %dst.ext, %b
+  %udiv.0 = udiv i16 %mul.dst.0, 255
+  %trunc.0 = trunc nuw i16 %udiv.0 to i8
+  %val.0 = add i8 %a, %trunc.0
+  store i8 %val.0, ptr %dst, align 1
+  %gep.dst.1 = getelementptr inbounds nuw i8, ptr %dst, i64 1
+  %load.dst.1 = load i8, ptr %gep.dst.1, align 1
+  %dst.1.ext = zext i8 %load.dst.1 to i16
+  %mul.dst.1 = mul nuw i16 %dst.1.ext, %b
+  %udiv.1 = udiv i16 %mul.dst.1, 255
+  %trunc.1 = trunc nuw i16 %udiv.1 to i8
+  %val.1 = add i8 %a, %trunc.1
+  store i8 %val.1, ptr %gep.dst.1, align 1
+  %gep.dst.2 = getelementptr inbounds nuw i8, ptr %dst, i64 2
+  %load.dst.2 = load i8, ptr %gep.dst.2, align 1
+  %dst.2.ext = zext i8 %load.dst.2 to i16
+  %mul.dst.2 = mul nuw i16 %dst.2.ext, %b
+  %udiv.2 = udiv i16 %mul.dst.2, 255
+  %trunc.2 = trunc nuw i16 %udiv.2 to i8
+  %val.2 = add i8 %a, %trunc.2
+  store i8 %val.2, ptr %gep.dst.2, align 1
+  %gep.dst.3 = getelementptr inbounds nuw i8, ptr %dst, i64 3
+  %load.dst.3 = load i8, ptr %gep.dst.3, align 1
+  %dst.3.ext = zext i8 %load.dst.3 to i16
+  %mul.dst.3 = mul nuw i16 %dst.3.ext, %b
+  %udiv.3 = udiv i16 %mul.dst.3, 255
+  %trunc.3 = trunc nuw i16 %udiv.3 to i8
+  %val.3 = add i8 %a, %trunc.3
+  store i8 %val.3, ptr %gep.dst.3, align 1
+  %iv.next = add i64 %iv, 4
+  %exit.cond = icmp eq i64 %iv.next, 256
+  br i1 %exit.cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Same as above, except one zext is replaced with an sext.
+define void @opcode_mismatch(ptr %dst.start, i8 %a, i16 %b) {
+; CHECK-LABEL: define void @opcode_mismatch(
+; CHECK-SAME: ptr [[DST_START:%.*]], i8 [[A:%.*]], i16 [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
+; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-NEXT:    [[TMP0:%.*]] = zext <4 x i8> [[STRIDED_VEC]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw <4 x i16> [[TMP0]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], splat (i16 255)
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc nuw <4 x i16> [[TMP2]] to <4 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = sext <4 x i8> [[STRIDED_VEC3]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw <4 x i16> [[TMP5]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP7:%.*]] = udiv <4 x i16> [[TMP6]], splat (i16 255)
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc nuw <4 x i16> [[TMP7]] to <4 x i8>
+; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = zext <4 x i8> [[STRIDED_VEC4]] to <4 x i16>
+; CHECK-NEXT:    [[TMP11:%.*]] = mul nuw <4 x i16> [[TMP10]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP12:%.*]] = udiv <4 x i16> [[TMP11]], splat (i16 255)
+; CHECK-NEXT:    [[TMP13:%.*]] = trunc nuw <4 x i16> [[TMP12]] to <4 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = zext <4 x i8> [[STRIDED_VEC5]] to <4 x i16>
+; CHECK-NEXT:    [[TMP16:%.*]] = mul nuw <4 x i16> [[TMP15]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP17:%.*]] = udiv <4 x i16> [[TMP16]], splat (i16 255)
+; CHECK-NEXT:    [[TMP18:%.*]] = trunc nuw <4 x i16> [[TMP17]] to <4 x i8>
+; CHECK-NEXT:    [[TMP19:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP18]]
+; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-NEXT:    store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
+; CHECK-NEXT:    br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %dst = phi ptr [ %dst.start, %entry ], [ %dst.next, %loop ]
+  %dst.next = getelementptr inbounds nuw i8, ptr %dst, i64 4
+  %load.dst = load i8, ptr %dst, align 1
+  %dst.ext = zext i8 %load.dst to i16
+  %mul.dst.0 = mul nuw i16 %dst.ext, %b
+  %udiv.0 = udiv i16 %mul.dst.0, 255
+  %trunc.0 = trunc nuw i16 %udiv.0 to i8
+  %val.0 = add i8 %a, %trunc.0
+  store i8 %val.0, ptr %dst, align 1
+  %gep.dst.1 = getelementptr inbounds nuw i8, ptr %dst, i64 1
+  %load.dst.1 = load i8, ptr %gep.dst.1, align 1
+  %dst.1.ext = sext i8 %load.dst.1 to i16
+  %mul.dst.1 = mul nuw i16 %dst.1.ext, %b
+  %udiv.1 = udiv i16 %mul.dst.1, 255
+  %trunc.1 = trunc nuw i16 %udiv.1 to i8
+  %val.1 = add i8 %a, %trunc.1
+  store i8 %val.1, ptr %gep.dst.1, align 1
+  %gep.dst.2 = getelementptr inbounds nuw i8, ptr %dst, i64 2
+  %load.dst.2 = load i8, ptr %gep.dst.2, align 1
+  %dst.2.ext = zext i8 %load.dst.2 to i16
+  %mul.dst.2 = mul nuw i16 %dst.2.ext, %b
+  %udiv.2 = udiv i16 %mul.dst.2, 255
+  %trunc.2 = trunc nuw i16 %udiv.2 to i8
+  %val.2 = add i8 %a, %trunc.2
+  store i8 %val.2, ptr %gep.dst.2, align 1
+  %gep.dst.3 = getelementptr inbounds nuw i8, ptr %dst, i64 3
+  %load.dst.3 = load i8, ptr %gep.dst.3, align 1
+  %dst.3.ext = zext i8 %load.dst.3 to i16
+  %mul.dst.3 = mul nuw i16 %dst.3.ext, %b
+  %udiv.3 = udiv i16 %mul.dst.3, 255
+  %trunc.3 = trunc nuw i16 %udiv.3 to i8
+  %val.3 = add i8 %a, %trunc.3
+  store i8 %val.3, ptr %gep.dst.3, align 1
+  %iv.next = add i64 %iv, 4
+  %exit.cond = icmp eq i64 %iv.next, 256
+  br i1 %exit.cond, label %exit, label %loop
+
+exit:
+  ret void
+}

From 621cbcde0161341494b546a1fb478cfd57d1a94f Mon Sep 17 00:00:00 2001
From: Atmn Patel <atmnp@nvidia.com>
Date: Mon, 24 Nov 2025 12:15:00 -0600
Subject: [PATCH 25/37] [mlir][acc] Adds attr to acc.present to identify
 default clause origin (#169114)

The `acc.present` Op as generated by ACCImplicitData does not provide a
way to differentiate between `acc.present` ops that are generated
implicitly and the ones that are generated as result of an explicit
`default(present)` clause in the source code. This differentiation would
allow for better communication to the user on the decisions made by the
compiler while managing data automatically between the host and the
device. This commit adds this information as a discardable attribute on
the `acc.present` op.
---
 flang/test/Transforms/OpenACC/acc-implicit-data.fir     | 8 ++++----
 mlir/include/mlir/Dialect/OpenACC/OpenACC.h             | 4 ++++
 mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp | 2 ++
 mlir/test/Dialect/OpenACC/acc-implicit-data.mlir        | 2 +-
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/flang/test/Transforms/OpenACC/acc-implicit-data.fir b/flang/test/Transforms/OpenACC/acc-implicit-data.fir
index 7f6a57cb4d8c6..2d28c341d0d5e 100644
--- a/flang/test/Transforms/OpenACC/acc-implicit-data.fir
+++ b/flang/test/Transforms/OpenACC/acc-implicit-data.fir
@@ -133,7 +133,7 @@ func.func @test_fir_derivedtype_in_parallel_defaultpresent() {
   return
 }
 
-// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {implicit = true, name = "aggrvar"}
+// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {acc.from_default, implicit = true, name = "aggrvar"}
 // CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = "aggrvar"}
 
 // -----
@@ -147,7 +147,7 @@ func.func @test_fir_derivedtype_in_kernels_defaultpresent() {
   return
 }
 
-// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {implicit = true, name = "aggrvar"}
+// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {acc.from_default, implicit = true, name = "aggrvar"}
 // CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = "aggrvar"}
 
 // -----
@@ -161,7 +161,7 @@ func.func @test_fir_array_in_parallel_defaultpresent() {
   return
 }
 
-// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {implicit = true, name = "arrayvar"}
+// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {acc.from_default, implicit = true, name = "arrayvar"}
 // CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref<!fir.array<10xf32>>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = "arrayvar"}
 
 // -----
@@ -175,7 +175,7 @@ func.func @test_fir_array_in_kernels_defaultpresent() {
   return
 }
 
-// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {implicit = true, name = "arrayvar"}
+// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {acc.from_default, implicit = true, name = "arrayvar"}
 // CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref<!fir.array<10xf32>>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = "arrayvar"}
 
 // -----
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
index 05d2316711c8a..601fc1a594768 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
@@ -177,6 +177,10 @@ static constexpr StringLiteral getRoutineInfoAttrName() {
   return StringLiteral("acc.routine_info");
 }
 
+static constexpr StringLiteral getFromDefaultClauseAttrName() {
+  return StringLiteral("acc.from_default");
+}
+
 static constexpr StringLiteral getVarNameAttrName() {
   return VarNameAttr::name;
 }
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp
index 91262bd76ca31..7d729619b3f21 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp
@@ -570,6 +570,8 @@ Operation *ACCImplicitData::generateDataClauseOpForCandidate(
       newDataOp = acc::PresentOp::create(builder, loc, var,
                                          /*structured=*/true, /*implicit=*/true,
                                          accSupport.getVariableName(var));
+      newDataOp->setAttr(acc::getFromDefaultClauseAttrName(),
+                         builder.getUnitAttr());
     } else {
       auto copyinOp =
           acc::CopyinOp::create(builder, loc, var,
diff --git a/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir b/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir
index cf09c33ca5197..06c1c3cadd4ba 100644
--- a/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir
+++ b/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir
@@ -110,7 +110,7 @@ func.func @test_array_parallel_defaultpresent() {
 }
 
 // CHECK-LABEL: func.func @test_array_parallel_defaultpresent
-// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : memref<10xf32>) -> memref<10xf32> {implicit = true, name = ""}
+// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : memref<10xf32>) -> memref<10xf32> {acc.from_default, implicit = true, name = ""}
 // CHECK: acc.delete accPtr(%[[PRESENT]] : memref<10xf32>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = ""}
 
 // -----

From a27bb38ee6f5762e715803d8eb6ffc5a8dd09575 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi@nvidia.com>
Date: Mon, 24 Nov 2025 10:19:15 -0800
Subject: [PATCH 26/37] Reapply "[NFC][bugpoint] Namespace cleanup in
 `bugpoint`" (#168961) (#169055)

This reverts commit b83e458fe5330227581e1e65f3866ddfcd597837.

Also undo the use of namespace qualifier for `ReducePassList` as that
seems to cause build failures.
---
 llvm/tools/bugpoint/BugDriver.h         |  5 +++++
 llvm/tools/bugpoint/ExecutionDriver.cpp | 17 +++++++----------
 llvm/tools/bugpoint/ExtractFunction.cpp |  3 ---
 llvm/tools/bugpoint/Miscompilation.cpp  |  5 -----
 llvm/tools/bugpoint/OptimizerDriver.cpp |  4 ----
 5 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/llvm/tools/bugpoint/BugDriver.h b/llvm/tools/bugpoint/BugDriver.h
index ca57405f9d770..71a5aa14bbb2e 100644
--- a/llvm/tools/bugpoint/BugDriver.h
+++ b/llvm/tools/bugpoint/BugDriver.h
@@ -16,6 +16,7 @@
 #define LLVM_TOOLS_BUGPOINT_BUGDRIVER_H
 
 #include "llvm/IR/ValueMap.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
@@ -41,6 +42,10 @@ extern bool DisableSimplifyCFG;
 ///
 extern bool BugpointIsInterrupted;
 
+/// Command line options used across files.
+extern cl::list<std::string> InputArgv;
+extern cl::opt<std::string> OutputPrefix;
+
 class BugDriver {
   LLVMContext &Context;
   const char *ToolName;            // argv[0] of bugpoint
diff --git a/llvm/tools/bugpoint/ExecutionDriver.cpp b/llvm/tools/bugpoint/ExecutionDriver.cpp
index 8c6b7fbe50c7c..96eeb35b4db70 100644
--- a/llvm/tools/bugpoint/ExecutionDriver.cpp
+++ b/llvm/tools/bugpoint/ExecutionDriver.cpp
@@ -13,7 +13,6 @@
 
 #include "BugDriver.h"
 #include "ToolRunner.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/Program.h"
@@ -102,15 +101,13 @@ static cl::opt<std::string> CustomExecCommand(
 
 // Anything specified after the --args option are taken as arguments to the
 // program being debugged.
-namespace llvm {
-cl::list<std::string> InputArgv("args", cl::Positional,
-                                cl::desc("<program arguments>..."),
-                                cl::PositionalEatsArgs);
-
-cl::opt<std::string>
-    OutputPrefix("output-prefix", cl::init("bugpoint"),
-                 cl::desc("Prefix to use for outputs (default: 'bugpoint')"));
-} // namespace llvm
+cl::list<std::string> llvm::InputArgv("args", cl::Positional,
+                                      cl::desc("<program arguments>..."),
+                                      cl::PositionalEatsArgs);
+
+cl::opt<std::string> llvm::OutputPrefix(
+    "output-prefix", cl::init("bugpoint"),
+    cl::desc("Prefix to use for outputs (default: 'bugpoint')"));
 
 static cl::list<std::string> ToolArgv("tool-args", cl::Positional,
                                       cl::desc("<tool arguments>..."),
diff --git a/llvm/tools/bugpoint/ExtractFunction.cpp b/llvm/tools/bugpoint/ExtractFunction.cpp
index 3206589ff38f2..31cdd0d43f2fc 100644
--- a/llvm/tools/bugpoint/ExtractFunction.cpp
+++ b/llvm/tools/bugpoint/ExtractFunction.cpp
@@ -36,9 +36,6 @@ using namespace llvm;
 #define DEBUG_TYPE "bugpoint"
 
 bool llvm::DisableSimplifyCFG = false;
-namespace llvm {
-extern cl::opt<std::string> OutputPrefix;
-} // namespace llvm
 
 static cl::opt<bool>
     NoDCE("disable-dce",
diff --git a/llvm/tools/bugpoint/Miscompilation.cpp b/llvm/tools/bugpoint/Miscompilation.cpp
index a7f1643aecf15..dcad126d87865 100644
--- a/llvm/tools/bugpoint/Miscompilation.cpp
+++ b/llvm/tools/bugpoint/Miscompilation.cpp
@@ -28,11 +28,6 @@
 
 using namespace llvm;
 
-namespace llvm {
-extern cl::opt<std::string> OutputPrefix;
-extern cl::list<std::string> InputArgv;
-} // end namespace llvm
-
 static cl::opt<bool> DisableLoopExtraction(
     "disable-loop-extraction",
     cl::desc("Don't extract loops when searching for miscompilations"),
diff --git a/llvm/tools/bugpoint/OptimizerDriver.cpp b/llvm/tools/bugpoint/OptimizerDriver.cpp
index bf2e8c0b4a910..191f87c08a0f6 100644
--- a/llvm/tools/bugpoint/OptimizerDriver.cpp
+++ b/llvm/tools/bugpoint/OptimizerDriver.cpp
@@ -34,10 +34,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "bugpoint"
 
-namespace llvm {
-extern cl::opt<std::string> OutputPrefix;
-}
-
 static cl::opt<std::string>
     OptCmd("opt-command", cl::init(""),
            cl::desc("Path to opt. (default: search path "

From 1b65752d16045114ed381c95306517ff99147cda Mon Sep 17 00:00:00 2001
From: Erich Keane <ekeane@nvidia.com>
Date: Mon, 24 Nov 2025 10:33:10 -0800
Subject: [PATCH 27/37] [OpenACC][CIR] Implement 'present' lowering on
 local-declare (#169381)

Just like the last handful of patches that did copy, copyin, copyout,
     create, etc, this patch has the exact same behavior, except the
     entry op is a present, and the exit is delete.
---
 clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp   |  10 +-
 clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp |  10 +-
 .../CIR/CodeGenOpenACC/declare-present.cpp    | 199 ++++++++++++++++++
 3 files changed, 212 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenOpenACC/declare-present.cpp

diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
index bf9ec3701e6ea..9c1aeb87c8029 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
@@ -55,8 +55,8 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup {
       if (auto copyin = val.getDefiningOp<mlir::acc::CopyinOp>()) {
         switch (copyin.getDataClause()) {
         default:
-          cgf.cgm.errorNYI(declareRange,
-                           "OpenACC local declare clause copyin cleanup");
+          llvm_unreachable(
+              "OpenACC local declare clause copyin unexpected data clause");
           break;
         case mlir::acc::DataClause::acc_copy:
           createOutOp<mlir::acc::CopyoutOp>(cgf, copyin);
@@ -68,8 +68,8 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup {
       } else if (auto create = val.getDefiningOp<mlir::acc::CreateOp>()) {
         switch (create.getDataClause()) {
         default:
-          cgf.cgm.errorNYI(declareRange,
-                           "OpenACC local declare clause create cleanup");
+          llvm_unreachable(
+              "OpenACC local declare clause create unexpected data clause");
           break;
         case mlir::acc::DataClause::acc_copyout:
           createOutOp<mlir::acc::CopyoutOp>(cgf, create);
@@ -78,6 +78,8 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup {
           createOutOp<mlir::acc::DeleteOp>(cgf, create);
           break;
         }
+      } else if (auto create = val.getDefiningOp<mlir::acc::PresentOp>()) {
+        createOutOp<mlir::acc::DeleteOp>(cgf, create);
       } else if (val.getDefiningOp<mlir::acc::DeclareLinkOp>()) {
         // Link has no exit clauses, and shouldn't be copied.
         continue;
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
index 3e229d0d76917..a23ec93ab1d75 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
@@ -984,12 +984,16 @@ class OpenACCClauseCIREmitter final
         addDataOperand<mlir::acc::PresentOp, mlir::acc::DeleteOp>(
             var, mlir::acc::DataClause::acc_present, {}, /*structured=*/true,
             /*implicit=*/false);
+    } else if constexpr (isOneOfTypes<OpTy, mlir::acc::DeclareEnterOp>) {
+      for (const Expr *var : clause.getVarList())
+        addDataOperand<mlir::acc::PresentOp>(
+            var, mlir::acc::DataClause::acc_present, {},
+            /*structured=*/true,
+            /*implicit=*/false);
     } else if constexpr (isCombinedType<OpTy>) {
       applyToComputeOp(clause);
     } else {
-      // TODO: When we've implemented this for everything, switch this to an
-      // unreachable. declare remains.
-      return clauseNotImplemented(clause);
+      llvm_unreachable("Unknown construct kind in VisitPresentClause");
     }
   }
 
diff --git a/clang/test/CIR/CodeGenOpenACC/declare-present.cpp b/clang/test/CIR/CodeGenOpenACC/declare-present.cpp
new file mode 100644
index 0000000000000..c17b9597adf12
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/declare-present.cpp
@@ -0,0 +1,199 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+struct HasSideEffects {
+  HasSideEffects();
+  ~HasSideEffects();
+};
+
+// TODO: OpenACC: Implement 'global', NS lowering.
+
+struct Struct {
+  static const HasSideEffects StaticMemHSE;
+  static const HasSideEffects StaticMemHSEArr[5];
+  static const int StaticMemInt;
+
+  // TODO: OpenACC: Implement static-local lowering.
+
+  void MemFunc1(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) {
+    // CHECK: cir.func {{.*}}MemFunc1{{.*}}(%{{.*}}: !cir.ptr<!rec_Struct>{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr<!rec_HasSideEffects>{{.*}})
+    // CHECK-NEXT: cir.alloca{{.*}}["this"
+    // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE"
+    // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt
+    // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_HasSideEffects>{{.*}}["ArgHSEPtr"
+    // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE
+    // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasSideEffects x 5>{{.*}}["LocalHSEArr
+    // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.load
+
+    HasSideEffects LocalHSE;
+    // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr<!rec_HasSideEffects>) -> ()
+    HasSideEffects LocalHSEArr[5];
+    int LocalInt;
+
+#pragma acc declare present(ArgHSE, ArgInt, LocalHSE, LocalInt, ArgHSEPtr[1:1], LocalHSEArr[1:1])
+    // CHECK: %[[ARG_HSE_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {name = "ArgHSE"}
+    // CHECK-NEXT: %[[ARG_INT_PRESENT:.*]] = acc.present varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "ArgInt"} 
+    // CHECK-NEXT: %[[LOC_HSE_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {name = "LocalHSE"} 
+    // CHECK-NEXT: %[[LOC_INT_PRESENT:.*]] = acc.present varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "LocalInt"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[ARG_HSE_PTR_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) -> !cir.ptr<!cir.ptr<!rec_HasSideEffects>> {name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[LOC_HSE_ARR_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) -> !cir.ptr<!cir.array<!rec_HasSideEffects x 5>> {name = "LocalHSEArr[1:1]"}
+    // CHECK-NEXT: %[[ENTER:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    //
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER]]) dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PRESENT]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_present>, name = "ArgHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_PRESENT]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_present>, name = "ArgInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_PRESENT]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_present>, name = "LocalHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_PRESENT]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_present>, name = "LocalInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_PRESENT]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) {dataClause = #acc<data_clause acc_present>, name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_PRESENT]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) {dataClause = #acc<data_clause acc_present>, name = "LocalHSEArr[1:1]"}
+  }
+  void MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr);
+};
+
+void use() {
+  Struct s;
+  s.MemFunc1(HasSideEffects{}, 0, nullptr);
+}
+
+void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) {
+    // CHECK: cir.func {{.*}}MemFunc2{{.*}}(%{{.*}}: !cir.ptr<!rec_Struct>{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr<!rec_HasSideEffects>{{.*}})
+    // CHECK-NEXT: cir.alloca{{.*}}["this"
+    // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE"
+    // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt
+    // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_HasSideEffects>{{.*}}["ArgHSEPtr"
+    // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE
+    // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasSideEffects x 5>{{.*}}["LocalHSEArr
+    // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.load
+    HasSideEffects LocalHSE;
+    // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr<!rec_HasSideEffects>) -> ()
+    HasSideEffects LocalHSEArr[5];
+    // CHECK: do {
+    // CHECK: } while {
+    // CHECK: }
+    int LocalInt;
+#pragma acc declare present(ArgHSE, ArgInt, ArgHSEPtr[1:1])
+    // CHECK: %[[ARG_HSE_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {name = "ArgHSE"}
+    // CHECK-NEXT: %[[ARG_INT_PRESENT:.*]] = acc.present varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "ArgInt"} 
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[ARG_HSE_PTR_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) -> !cir.ptr<!cir.ptr<!rec_HasSideEffects>> {name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+
+#pragma acc declare present(LocalHSE, LocalInt, LocalHSEArr[1:1])
+    // CHECK-NEXT: %[[LOC_HSE_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {name = "LocalHSE"} 
+    // CHECK-NEXT: %[[LOC_INT_PRESENT:.*]] = acc.present varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "LocalInt"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[LOC_HSE_ARR_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) -> !cir.ptr<!cir.array<!rec_HasSideEffects x 5>> {name = "LocalHSEArr[1:1]"}
+    // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_PRESENT]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_present>, name = "LocalHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_PRESENT]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_present>, name = "LocalInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_PRESENT]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) {dataClause = #acc<data_clause acc_present>, name = "LocalHSEArr[1:1]"}
+    //
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PRESENT]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_present>, name = "ArgHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_PRESENT]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_present>, name = "ArgInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_PRESENT]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) {dataClause = #acc<data_clause acc_present>, name = "ArgHSEPtr[1:1]"}
+}
+
+extern "C" void do_thing();
+
+extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) {
+    // CHECK: cir.func {{.*}}NormalFunc(%[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr<!rec_HasSideEffects>{{.*}})
+    // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE"
+    // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt
+    // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_HasSideEffects>{{.*}}["ArgHSEPtr"
+    // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE
+    // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasSideEffects x 5>{{.*}}["LocalHSEArr
+    // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    HasSideEffects LocalHSE;
+    // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr<!rec_HasSideEffects>) -> ()
+    HasSideEffects LocalHSEArr[5];
+    // CHECK: do {
+    // CHECK: } while {
+    // CHECK: }
+    int LocalInt;
+#pragma acc declare present(ArgHSE, ArgInt, ArgHSEPtr[1:1])
+    // CHECK: %[[ARG_HSE_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {name = "ArgHSE"}
+    // CHECK-NEXT: %[[ARG_INT_PRESENT:.*]] = acc.present varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "ArgInt"} 
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[ARG_HSE_PTR_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) -> !cir.ptr<!cir.ptr<!rec_HasSideEffects>> {name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+    {
+      // CHECK-NEXT: cir.scope {
+#pragma acc declare present(LocalHSE, LocalInt, LocalHSEArr[1:1])
+    // CHECK-NEXT: %[[LOC_HSE_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {name = "LocalHSE"} 
+    // CHECK-NEXT: %[[LOC_INT_PRESENT:.*]] = acc.present varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "LocalInt"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[LOC_HSE_ARR_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) -> !cir.ptr<!cir.array<!rec_HasSideEffects x 5>> {name = "LocalHSEArr[1:1]"}
+    // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+
+    do_thing();
+    // CHECK-NEXT: cir.call @do_thing
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_PRESENT]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_present>, name = "LocalHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_PRESENT]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_present>, name = "LocalInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_PRESENT]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) {dataClause = #acc<data_clause acc_present>, name = "LocalHSEArr[1:1]"}
+    }
+    // CHECK-NEXT: }
+
+    // Make sure that cleanup gets put in the right scope.
+    do_thing();
+    // CHECK-NEXT: cir.call @do_thing
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+ 
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PRESENT]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_present>, name = "ArgHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_PRESENT]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_present>, name = "ArgInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_PRESENT]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) {dataClause = #acc<data_clause acc_present>, name = "ArgHSEPtr[1:1]"}
+}
+

From 740d0bd385967f6ae0171896722143d9a70b66a5 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Mon, 24 Nov 2025 13:39:15 -0500
Subject: [PATCH 28/37] [MLIR][Python] add GetTypeID for llvm.struct_type and
 llvm.ptr and enable downcasting (#169383)

---
 mlir/include/mlir-c/Dialect/LLVM.h       | 4 ++++
 mlir/lib/Bindings/Python/DialectLLVM.cpp | 7 ++++---
 mlir/lib/CAPI/Dialect/LLVM.cpp           | 8 ++++++++
 mlir/test/python/dialects/llvm.py        | 6 ++++++
 4 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h
index c1ade9ed8617c..cc7f09f71d028 100644
--- a/mlir/include/mlir-c/Dialect/LLVM.h
+++ b/mlir/include/mlir-c/Dialect/LLVM.h
@@ -23,6 +23,8 @@ MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(LLVM, llvm);
 MLIR_CAPI_EXPORTED MlirType mlirLLVMPointerTypeGet(MlirContext ctx,
                                                    unsigned addressSpace);
 
+MLIR_CAPI_EXPORTED MlirTypeID mlirLLVMPointerTypeGetTypeID(void);
+
 /// Returns `true` if the type is an LLVM dialect pointer type.
 MLIR_CAPI_EXPORTED bool mlirTypeIsALLVMPointerType(MlirType type);
 
@@ -58,6 +60,8 @@ MLIR_CAPI_EXPORTED MlirType mlirLLVMFunctionTypeGetReturnType(MlirType type);
 /// Returns `true` if the type is an LLVM dialect struct type.
 MLIR_CAPI_EXPORTED bool mlirTypeIsALLVMStructType(MlirType type);
 
+MLIR_CAPI_EXPORTED MlirTypeID mlirLLVMStructTypeGetTypeID(void);
+
 /// Returns `true` if the type is a literal (unnamed) LLVM struct type.
 MLIR_CAPI_EXPORTED bool mlirLLVMStructTypeIsLiteral(MlirType type);
 
diff --git a/mlir/lib/Bindings/Python/DialectLLVM.cpp b/mlir/lib/Bindings/Python/DialectLLVM.cpp
index 870a713b8edcb..05681cecf82b3 100644
--- a/mlir/lib/Bindings/Python/DialectLLVM.cpp
+++ b/mlir/lib/Bindings/Python/DialectLLVM.cpp
@@ -31,8 +31,8 @@ static void populateDialectLLVMSubmodule(nanobind::module_ &m) {
   // StructType
   //===--------------------------------------------------------------------===//
 
-  auto llvmStructType =
-      mlir_type_subclass(m, "StructType", mlirTypeIsALLVMStructType);
+  auto llvmStructType = mlir_type_subclass(
+      m, "StructType", mlirTypeIsALLVMStructType, mlirLLVMStructTypeGetTypeID);
 
   llvmStructType
       .def_classmethod(
@@ -137,7 +137,8 @@ static void populateDialectLLVMSubmodule(nanobind::module_ &m) {
   // PointerType
   //===--------------------------------------------------------------------===//
 
-  mlir_type_subclass(m, "PointerType", mlirTypeIsALLVMPointerType)
+  mlir_type_subclass(m, "PointerType", mlirTypeIsALLVMPointerType,
+                     mlirLLVMPointerTypeGetTypeID)
       .def_classmethod(
           "get",
           [](const nb::object &cls, std::optional<unsigned> addressSpace,
diff --git a/mlir/lib/CAPI/Dialect/LLVM.cpp b/mlir/lib/CAPI/Dialect/LLVM.cpp
index 6636f0ea73ec9..bf231767320a5 100644
--- a/mlir/lib/CAPI/Dialect/LLVM.cpp
+++ b/mlir/lib/CAPI/Dialect/LLVM.cpp
@@ -27,6 +27,10 @@ MlirType mlirLLVMPointerTypeGet(MlirContext ctx, unsigned addressSpace) {
   return wrap(LLVMPointerType::get(unwrap(ctx), addressSpace));
 }
 
+MlirTypeID mlirLLVMPointerTypeGetTypeID() {
+  return wrap(LLVM::LLVMPointerType::getTypeID());
+}
+
 bool mlirTypeIsALLVMPointerType(MlirType type) {
   return isa<LLVM::LLVMPointerType>(unwrap(type));
 }
@@ -73,6 +77,10 @@ bool mlirTypeIsALLVMStructType(MlirType type) {
   return isa<LLVM::LLVMStructType>(unwrap(type));
 }
 
+MlirTypeID mlirLLVMStructTypeGetTypeID() {
+  return wrap(LLVM::LLVMStructType::getTypeID());
+}
+
 bool mlirLLVMStructTypeIsLiteral(MlirType type) {
   return !cast<LLVM::LLVMStructType>(unwrap(type)).isIdentified();
 }
diff --git a/mlir/test/python/dialects/llvm.py b/mlir/test/python/dialects/llvm.py
index 8ea0fddee3f7c..305ed9aba940d 100644
--- a/mlir/test/python/dialects/llvm.py
+++ b/mlir/test/python/dialects/llvm.py
@@ -98,6 +98,9 @@ def testStructType():
     assert opaque.opaque
     # CHECK: !llvm.struct<"opaque", opaque>
 
+    typ = Type.parse('!llvm.struct<"zoo", (i32, i64)>')
+    assert isinstance(typ, llvm.StructType)
+
 
 # CHECK-LABEL: testSmoke
 @constructAndPrintInModule
@@ -120,6 +123,9 @@ def testPointerType():
     # CHECK: !llvm.ptr<1>
     print(ptr_with_addr)
 
+    typ = Type.parse("!llvm.ptr<1>")
+    assert isinstance(typ, llvm.PointerType)
+
 
 # CHECK-LABEL: testConstant
 @constructAndPrintInModule

From d4cd331b7efc8cd5f15faa846697d9d61b0ff246 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Mon, 24 Nov 2025 18:39:29 +0000
Subject: [PATCH 29/37] [gn build] Port 2bdd1357c826

---
 llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index 09d2f1ed92554..82fe916645635 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -1167,7 +1167,6 @@ if (current_toolchain == default_toolchain) {
       "__locale_dir/locale_base_api.h",
       "__locale_dir/locale_base_api/bsd_locale_fallbacks.h",
       "__locale_dir/locale_base_api/ibm.h",
-      "__locale_dir/locale_base_api/musl.h",
       "__locale_dir/locale_base_api/openbsd.h",
       "__locale_dir/messages.h",
       "__locale_dir/money.h",

From 0e86510c787d68f5f87708b2efdaf92a7501b6c3 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Mon, 24 Nov 2025 18:39:30 +0000
Subject: [PATCH 30/37] [gn build] Port 3773bbe9e791

---
 llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn   | 2 ++
 llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn
index 66dbf6152472a..9b524e2ef7cd5 100644
--- a/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn
@@ -29,6 +29,8 @@ static_library("Driver") {
   sources = [
     "Action.cpp",
     "Compilation.cpp",
+    "CreateASTUnitFromArgs.cpp",
+    "CreateInvocationFromArgs.cpp",
     "Distro.cpp",
     "Driver.cpp",
     "Job.cpp",
diff --git a/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn
index 4009cfc609f4a..cdf39d645bc52 100644
--- a/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn
@@ -28,7 +28,6 @@ static_library("Frontend") {
     "ChainedIncludesSource.cpp",
     "CompilerInstance.cpp",
     "CompilerInvocation.cpp",
-    "CreateInvocationFromCommandLine.cpp",
     "DependencyFile.cpp",
     "DependencyGraph.cpp",
     "DiagnosticRenderer.cpp",
@@ -48,6 +47,7 @@ static_library("Frontend") {
     "SARIFDiagnosticPrinter.cpp",
     "SerializedDiagnosticPrinter.cpp",
     "SerializedDiagnosticReader.cpp",
+    "StandaloneDiagnostic.cpp",
     "TestModuleFileExtension.cpp",
     "TextDiagnostic.cpp",
     "TextDiagnosticBuffer.cpp",

From 40fb2ca506a873b031f90dac619ccca1d6ff0de5 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Mon, 24 Nov 2025 18:39:31 +0000
Subject: [PATCH 31/37] [gn build] Port 645e0dcbff33

---
 llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn
index ad72c0069237d..e54797e188a11 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn
@@ -165,6 +165,7 @@ static_library("LLVMRISCVCodeGen") {
     "RISCVVectorMaskDAGMutation.cpp",
     "RISCVVectorPeephole.cpp",
     "RISCVZacasABIFix.cpp",
+    "RISCVZilsdOptimizer.cpp",
   ]
 }
 

From 445956443bdf5dcc7fb8beb7dd9e571f31551519 Mon Sep 17 00:00:00 2001
From: Henry Baba-Weiss <henry.babaweiss@gmail.com>
Date: Mon, 24 Nov 2025 10:49:41 -0800
Subject: [PATCH 32/37] [clang][Sema] Handle target_clones redeclarations that
 omit the attribute (#169259)

This patch adds a case to `CheckMultiVersionAdditionalDecl()` that
detects redeclarations of `target_clones` functions which omit the
attribute, and makes sure they are marked as redeclarations. It also
updates the comment at the call site of
`CheckMultiVersionAdditionalDecl()` to reflect this.

Previously, `target_clones` multiversioned functions that omitted the
attribute from subsequent declarations would cause Clang to hit an
`llvm_unreachable` and crash. In the following example, the second
declaration (the function definition) should inherit the `target_clones`
attribute from the first declaration (the forward declaration):

```
__attribute__((target_clones("arch=atom", "default")))
void foo(void);

void foo(void) { /* ... */ }
```

However, `CheckMultiVersionAdditionalDecl()` was not recognizing the
function definition as a redeclaration of the forward declaration, which
prevented `Sema::MergeFunctionDecl()` from automatically inheriting the
attribute.

A side effect of this fix is that Clang now catches redeclarations of
`target_clones` functions that have conflicting types, which previously
caused Clang to crash by hitting that same `llvm_unreachable`. The
`bad_overload1` case in `clang/test/Sema/attr-target-clones.c` has been
updated to reflect this.

Fixes #165517
Fixes #129483
---
 clang/docs/ReleaseNotes.rst             |  2 ++
 clang/lib/Sema/SemaDecl.cpp             | 15 +++++++++++--
 clang/test/CodeGen/attr-target-clones.c | 29 +++++++++++++++++++++++++
 clang/test/Sema/attr-target-clones.c    | 13 +++++++++++
 4 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 439e47b209b2f..b12e4539dc3a6 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -491,6 +491,8 @@ Bug Fixes in This Version
 - Accept empty enumerations in MSVC-compatible C mode. (#GH114402)
 - Fix a bug leading to incorrect code generation with complex number compound assignment and bitfield values, which also caused a crash with UBsan. (#GH166798)
 - Fixed false-positive shadow diagnostics for lambdas in explicit object member functions. (#GH163731)
+- Fix an assertion failure when a ``target_clones`` attribute is only on the
+  forward declaration of a multiversioned function. (#GH165517) (#GH129483)
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index b8ca2a376fde8..651437a6f4c30 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -11996,6 +11996,16 @@ static bool CheckMultiVersionAdditionalDecl(
     }
   }
 
+  // Redeclarations of a target_clones function may omit the attribute, in which
+  // case it will be inherited during declaration merging.
+  if (NewMVKind == MultiVersionKind::None &&
+      OldMVKind == MultiVersionKind::TargetClones) {
+    NewFD->setIsMultiVersion();
+    Redeclaration = true;
+    OldDecl = OldFD;
+    return false;
+  }
+
   // Else, this is simply a non-redecl case.  Checking the 'value' is only
   // necessary in the Target case, since The CPUSpecific/Dispatch cases are
   // handled in the attribute adding step.
@@ -12119,8 +12129,9 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
   }
 
   // At this point, we have a multiversion function decl (in OldFD) AND an
-  // appropriate attribute in the current function decl.  Resolve that these are
-  // still compatible with previous declarations.
+  // appropriate attribute in the current function decl (unless it's allowed to
+  // omit the attribute).  Resolve that these are still compatible with previous
+  // declarations.
   return CheckMultiVersionAdditionalDecl(S, OldFD, NewFD, NewCPUDisp,
                                          NewCPUSpec, NewClones, Redeclaration,
                                          OldDecl, Previous);
diff --git a/clang/test/CodeGen/attr-target-clones.c b/clang/test/CodeGen/attr-target-clones.c
index 295b25d6478eb..56db77c2b09a3 100644
--- a/clang/test/CodeGen/attr-target-clones.c
+++ b/clang/test/CodeGen/attr-target-clones.c
@@ -125,6 +125,35 @@ void __attribute__((target_clones("default, arch=ivybridge"))) unused(void) {}
 // WINDOWS: musttail call void @unused.arch_ivybridge.0
 // WINDOWS: musttail call void @unused.default.1
 
+int __attribute__((target_clones("sse4.2, default"))) inherited(void);
+int inherited(void) { return 0; }
+// LINUX: define {{.*}}i32 @inherited.sse4.2.0()
+// LINUX: define {{.*}}i32 @inherited.default.1()
+// LINUX: define weak_odr ptr @inherited.resolver() #[[ATTR_RESOLVER]] comdat
+// LINUX: ret ptr @inherited.sse4.2.0
+// LINUX: ret ptr @inherited.default.1
+
+// DARWIN: define {{.*}}i32 @inherited.sse4.2.0()
+// DARWIN: define {{.*}}i32 @inherited.default.1()
+// DARWIN: define weak_odr ptr @inherited.resolver() #[[ATTR_RESOLVER]] {
+// DARWIN: ret ptr @inherited.sse4.2.0
+// DARWIN: ret ptr @inherited.default.1
+
+// WINDOWS: define dso_local i32 @inherited.sse4.2.0()
+// WINDOWS: define dso_local i32 @inherited.default.1()
+// WINDOWS: define weak_odr dso_local i32 @inherited() #[[ATTR_RESOLVER]] comdat
+// WINDOWS: musttail call i32 @inherited.sse4.2.0
+// WINDOWS: musttail call i32 @inherited.default.1
+
+int test_inherited(void) {
+  // LINUX: define {{.*}}i32 @test_inherited() #[[DEF:[0-9]+]]
+  // DARWIN: define {{.*}}i32 @test_inherited() #[[DEF:[0-9]+]]
+  // WINDOWS: define dso_local i32 @test_inherited() #[[DEF:[0-9]+]]
+  return inherited();
+  // LINUX: call i32 @inherited()
+  // DARWIN: call i32 @inherited()
+  // WINDOWS: call i32 @inherited()
+}
 
 inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2")))
 foo_inline(void) { return 0; }
diff --git a/clang/test/Sema/attr-target-clones.c b/clang/test/Sema/attr-target-clones.c
index 4597ea54d02bf..40688772eeb96 100644
--- a/clang/test/Sema/attr-target-clones.c
+++ b/clang/test/Sema/attr-target-clones.c
@@ -28,6 +28,17 @@ int __attribute__((target_clones("sse4.2", "arch=atom", "default"))) redecl4(voi
 int __attribute__((target_clones("sse4.2", "arch=sandybridge", "default")))
 redecl4(void) { return 1; }
 
+int __attribute__((target_clones("sse4.2", "default"))) redecl5(void);
+int redecl5(void) { return 1; }
+
+int redecl6(void);
+int __attribute__((target_clones("sse4.2", "default"))) redecl6(void) { return 1; }
+
+int __attribute__((target_clones("sse4.2", "default"))) redecl7(void);
+// expected-error@+2 {{multiversioning attributes cannot be combined}}
+// expected-note@-2 {{previous declaration is here}}
+int __attribute__((target("sse4.2"))) redecl7(void) { return 1; }
+
 int __attribute__((target("sse4.2"))) redef2(void) { return 1; }
 // expected-error@+2 {{multiversioning attributes cannot be combined}}
 // expected-note@-2 {{previous declaration is here}}
@@ -87,6 +98,8 @@ int useage(void) {
 int __attribute__((target_clones("sse4.2", "default"))) mv_after_use(void) { return 1; }
 
 void bad_overload1(void) __attribute__((target_clones("mmx", "sse4.2", "default")));
+// expected-error@+2 {{conflicting types for 'bad_overload1'}}
+// expected-note@-2 {{previous declaration is here}}
 void bad_overload1(int p) {}
 
 void bad_overload2(int p) {}

From f5e228b32ac0a59b5aa834caa80150ba877e82ce Mon Sep 17 00:00:00 2001
From: "Deric C." <cheung.deric@gmail.com>
Date: Mon, 24 Nov 2025 10:56:20 -0800
Subject: [PATCH 33/37] [DirectX] Simplify DXIL data scalarization, and data
 scalarize whole GEP chains (#168096)

- The DXIL data scalarizer only needs to change vectors into arrays. It
does not need to change the types of GEPs to match the pointer type.
This PR simplifies the `visitGetElementPtrInst` method to do just that
while also accounting for nested GEPs from ConstantExprs. (Before this
PR, there were still vector types lingering in nested GEPs with
ConstantExprs.)
- The `equivalentArrayTypeFromVector` function was awkwardly placed near
the top of the file and away from the other helper functions. The
function is now moved next to the other helper functions.
- Removed an unnecessary `||` condition from `isVectorOrArrayOfVectors`

Related tests have also been cleaned up, and the test CHECKs have been
modified to account for the new simplified behavior.
---
 .../Target/DirectX/DXILDataScalarization.cpp  | 131 +++++++-----------
 .../bugfix_150050_data_scalarize_const_gep.ll |  35 +++--
 llvm/test/CodeGen/DirectX/scalarize-alloca.ll |   6 +-
 llvm/test/CodeGen/DirectX/scalarize-global.ll |   6 +-
 4 files changed, 75 insertions(+), 103 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index 9f1616f6960fe..5f18c37ef1125 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -29,20 +29,6 @@ static const int MaxVecSize = 4;
 
 using namespace llvm;
 
-// Recursively creates an array-like version of a given vector type.
-static Type *equivalentArrayTypeFromVector(Type *T) {
-  if (auto *VecTy = dyn_cast<VectorType>(T))
-    return ArrayType::get(VecTy->getElementType(),
-                          dyn_cast<FixedVectorType>(VecTy)->getNumElements());
-  if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
-    Type *NewElementType =
-        equivalentArrayTypeFromVector(ArrayTy->getElementType());
-    return ArrayType::get(NewElementType, ArrayTy->getNumElements());
-  }
-  // If it's not a vector or array, return the original type.
-  return T;
-}
-
 class DXILDataScalarizationLegacy : public ModulePass {
 
 public:
@@ -121,12 +107,25 @@ DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) {
 static bool isVectorOrArrayOfVectors(Type *T) {
   if (isa<VectorType>(T))
     return true;
-  if (ArrayType *ArrType = dyn_cast<ArrayType>(T))
-    return isa<VectorType>(ArrType->getElementType()) ||
-           isVectorOrArrayOfVectors(ArrType->getElementType());
+  if (ArrayType *ArrayTy = dyn_cast<ArrayType>(T))
+    return isVectorOrArrayOfVectors(ArrayTy->getElementType());
   return false;
 }
 
+// Recursively creates an array-like version of a given vector type.
+static Type *equivalentArrayTypeFromVector(Type *T) {
+  if (auto *VecTy = dyn_cast<VectorType>(T))
+    return ArrayType::get(VecTy->getElementType(),
+                          dyn_cast<FixedVectorType>(VecTy)->getNumElements());
+  if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
+    Type *NewElementType =
+        equivalentArrayTypeFromVector(ArrayTy->getElementType());
+    return ArrayType::get(NewElementType, ArrayTy->getNumElements());
+  }
+  // If it's not a vector or array, return the original type.
+  return T;
+}
+
 bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) {
   Type *AllocatedType = AI.getAllocatedType();
   if (!isVectorOrArrayOfVectors(AllocatedType))
@@ -135,7 +134,7 @@ bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) {
   IRBuilder<> Builder(&AI);
   Type *NewType = equivalentArrayTypeFromVector(AllocatedType);
   AllocaInst *ArrAlloca =
-      Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarize");
+      Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarized");
   ArrAlloca->setAlignment(AI.getAlign());
   AI.replaceAllUsesWith(ArrAlloca);
   AI.eraseFromParent();
@@ -303,78 +302,44 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
 bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
   GEPOperator *GOp = cast<GEPOperator>(&GEPI);
   Value *PtrOperand = GOp->getPointerOperand();
-  Type *NewGEPType = GOp->getSourceElementType();
-
-  // Unwrap GEP ConstantExprs to find the base operand and element type
-  while (auto *GEPCE = dyn_cast_or_null<GEPOperator>(
-             dyn_cast<ConstantExpr>(PtrOperand))) {
-    GOp = GEPCE;
-    PtrOperand = GEPCE->getPointerOperand();
-    NewGEPType = GEPCE->getSourceElementType();
-  }
-
-  Type *const OrigGEPType = NewGEPType;
-  Value *const OrigOperand = PtrOperand;
-
-  if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
-    NewGEPType = NewGlobal->getValueType();
-    PtrOperand = NewGlobal;
-  } else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) {
-    Type *AllocatedType = Alloca->getAllocatedType();
-    if (isa<ArrayType>(AllocatedType) &&
-        AllocatedType != GOp->getResultElementType())
-      NewGEPType = AllocatedType;
-  } else
-    return false; // Only GEPs into an alloca or global variable are considered
-
-  // Defer changing i8 GEP types until dxil-flatten-arrays
-  if (OrigGEPType->isIntegerTy(8))
-    NewGEPType = OrigGEPType;
-
-  // If the original type is a "sub-type" of the new type, then ensure the gep
-  // correctly zero-indexes the extra dimensions to keep the offset calculation
-  // correct.
-  // Eg:
-  //  i32, [4 x i32] and [8 x [4 x i32]] are sub-types of [8 x [4 x i32]], etc.
-  //
-  // So then:
-  //   gep [4 x i32] %idx
-  //     -> gep [8 x [4 x i32]], i32 0, i32 %idx
-  //   gep i32 %idx
-  //     -> gep [8 x [4 x i32]], i32 0, i32 0, i32 %idx
-  uint32_t MissingDims = 0;
-  Type *SubType = NewGEPType;
-
-  // The new type will be in its array version; so match accordingly.
-  Type *const GEPArrType = equivalentArrayTypeFromVector(OrigGEPType);
-
-  while (SubType != GEPArrType) {
-    MissingDims++;
-
-    ArrayType *ArrType = dyn_cast<ArrayType>(SubType);
-    if (!ArrType) {
-      assert(SubType == GEPArrType &&
-             "GEP uses an DXIL invalid sub-type of alloca/global variable");
-      break;
-    }
-
-    SubType = ArrType->getElementType();
+  Type *GEPType = GOp->getSourceElementType();
+
+  // Replace a GEP ConstantExpr pointer operand with a GEP instruction so that
+  // it can be visited
+  if (auto *PtrOpGEPCE = dyn_cast<ConstantExpr>(PtrOperand);
+      PtrOpGEPCE && PtrOpGEPCE->getOpcode() == Instruction::GetElementPtr) {
+    GetElementPtrInst *OldGEPI =
+        cast<GetElementPtrInst>(PtrOpGEPCE->getAsInstruction());
+    OldGEPI->insertBefore(GEPI.getIterator());
+
+    IRBuilder<> Builder(&GEPI);
+    SmallVector<Value *> Indices(GEPI.indices());
+    Value *NewGEP =
+        Builder.CreateGEP(GEPI.getSourceElementType(), OldGEPI, Indices,
+                          GEPI.getName(), GEPI.getNoWrapFlags());
+    assert(isa<GetElementPtrInst>(NewGEP) &&
+           "Expected newly-created GEP to be an instruction");
+    GetElementPtrInst *NewGEPI = cast<GetElementPtrInst>(NewGEP);
+
+    GEPI.replaceAllUsesWith(NewGEPI);
+    GEPI.eraseFromParent();
+    visitGetElementPtrInst(*OldGEPI);
+    visitGetElementPtrInst(*NewGEPI);
+    return true;
   }
 
-  bool NeedsTransform = OrigOperand != PtrOperand ||
-                        OrigGEPType != NewGEPType || MissingDims != 0;
+  Type *NewGEPType = equivalentArrayTypeFromVector(GEPType);
+  Value *NewPtrOperand = PtrOperand;
+  if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand))
+    NewPtrOperand = NewGlobal;
 
+  bool NeedsTransform = NewPtrOperand != PtrOperand || NewGEPType != GEPType;
   if (!NeedsTransform)
     return false;
 
   IRBuilder<> Builder(&GEPI);
-  SmallVector<Value *, MaxVecSize> Indices;
-
-  for (uint32_t I = 0; I < MissingDims; I++)
-    Indices.push_back(Builder.getInt32(0));
-  llvm::append_range(Indices, GOp->indices());
-
-  Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices,
+  SmallVector<Value *, MaxVecSize> Indices(GOp->idx_begin(), GOp->idx_end());
+  Value *NewGEP = Builder.CreateGEP(NewGEPType, NewPtrOperand, Indices,
                                     GOp->getName(), GOp->getNoWrapFlags());
 
   GOp->replaceAllUsesWith(NewGEP);
diff --git a/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll
index 156a8e7c5c386..def886f933d08 100644
--- a/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll
+++ b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll
@@ -11,9 +11,10 @@ define void @CSMain() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE:%.*]] = alloca [4 x i32], align 16
 ;
-; SCHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1), i32 0, i32 2
-; SCHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(3) [[TMP0]], align 16
-; SCHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16
+; SCHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds [10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1
+; SCHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [10 x [4 x i32]], ptr addrspace(3) [[GEP0]], i32 0, i32 2
+; SCHECK-NEXT:    [[LOAD:%.*]] = load <4 x i32>, ptr addrspace(3) [[GEP1]], align 16
+; SCHECK-NEXT:    store <4 x i32> [[LOAD]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16
 ;
 ; FCHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE_I14:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 1
 ; FCHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE_I25:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 2
@@ -40,12 +41,13 @@ define void @Main() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[BFRAGPACKED_I:%.*]] = alloca i32, align 16
 ;
-; SCHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 1
-; SCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(3) [[TMP0]], align 16
-; SCHECK-NEXT:    store i32 [[TMP1]], ptr [[BFRAGPACKED_I]], align 16
+; SCHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1
+; SCHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) [[GEP0]], i32 0, i32 1
+; SCHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(3) [[GEP1]], align 16
+; SCHECK-NEXT:    store i32 [[LOAD]], ptr [[BFRAGPACKED_I]], align 16
 ;
-; FCHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16
-; FCHECK-NEXT:    store i32 [[TMP0]], ptr [[BFRAGPACKED_I]], align 16
+; FCHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16
+; FCHECK-NEXT:    store i32 [[LOAD]], ptr [[BFRAGPACKED_I]], align 16
 ;
 ; CHECK-NEXT:    ret void
 entry:
@@ -57,10 +59,12 @@ entry:
 
 define void @global_nested_geps_3d() {
 ; CHECK-LABEL: define void @global_nested_geps_3d() {
-; SCHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1
-; SCHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+; SCHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1
+; SCHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[GEP0]], i32 0, i32 1
+; SCHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr [[GEP1]], i32 0, i32 1
+; SCHECK-NEXT:    load i32, ptr [[GEP2]], align 4
 ;
-; FCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4
+; FCHECK-NEXT:    load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4
 ;
 ; CHECK-NEXT:    ret void
   %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* @cTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4
@@ -69,10 +73,13 @@ define void @global_nested_geps_3d() {
 
 define void @global_nested_geps_4d() {
 ; CHECK-LABEL: define void @global_nested_geps_4d() {
-; SCHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x <2 x i32>]], ptr getelementptr inbounds ([2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1
-; SCHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+; SCHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds [2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1
+; SCHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], ptr [[GEP0]], i32 0, i32 1
+; SCHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[GEP1]], i32 0, i32 1
+; SCHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr [[GEP2]], i32 0, i32 1
+; SCHECK-NEXT:    load i32, ptr [[GEP3]], align 4
 ;
-; FCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4
+; FCHECK-NEXT:    load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4
 ;
 ; CHECK-NEXT:    ret void
   %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* getelementptr inbounds ([2 x [2 x [2 x <2 x i32>]]], [2 x [2 x [2 x <2 x i32>]]]* @dTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4
diff --git a/llvm/test/CodeGen/DirectX/scalarize-alloca.ll b/llvm/test/CodeGen/DirectX/scalarize-alloca.ll
index 475935d2eb135..85e3bb0185e44 100644
--- a/llvm/test/CodeGen/DirectX/scalarize-alloca.ll
+++ b/llvm/test/CodeGen/DirectX/scalarize-alloca.ll
@@ -48,7 +48,7 @@ define void @subtype_array_test() {
   ; SCHECK:  [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
   ; FCHECK:  [[alloca_val:%.*]] = alloca [32 x i32], align 4
   ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
-  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]]
+  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[alloca_val]], i32 [[tid]]
   ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
   ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
   ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
@@ -64,7 +64,7 @@ define void @subtype_vector_test() {
   ; SCHECK:  [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
   ; FCHECK:  [[alloca_val:%.*]] = alloca [32 x i32], align 4
   ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
-  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]]
+  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[alloca_val]], i32 [[tid]]
   ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
   ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
   ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
@@ -80,7 +80,7 @@ define void @subtype_scalar_test() {
   ; SCHECK:  [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
   ; FCHECK:  [[alloca_val:%.*]] = alloca [32 x i32], align 4
   ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
-  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 0, i32 [[tid]]
+  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i32, ptr [[alloca_val]], i32 [[tid]]
   ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
   ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
   ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
diff --git a/llvm/test/CodeGen/DirectX/scalarize-global.ll b/llvm/test/CodeGen/DirectX/scalarize-global.ll
index ca10f6ece5a85..c27dc4083bfd3 100644
--- a/llvm/test/CodeGen/DirectX/scalarize-global.ll
+++ b/llvm/test/CodeGen/DirectX/scalarize-global.ll
@@ -11,7 +11,7 @@
 ; CHECK-LABEL: subtype_array_test
 define <4 x i32> @subtype_array_test() {
   ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
-  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]]
+  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) [[arrayofVecData]], i32 [[tid]]
   ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
   ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
   ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
@@ -26,7 +26,7 @@ define <4 x i32> @subtype_array_test() {
 ; CHECK-LABEL: subtype_vector_test
 define <4 x i32> @subtype_vector_test() {
   ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
-  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]]
+  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) [[arrayofVecData]], i32 [[tid]]
   ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
   ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
   ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
@@ -41,7 +41,7 @@ define <4 x i32> @subtype_vector_test() {
 ; CHECK-LABEL: subtype_scalar_test
 define <4 x i32> @subtype_scalar_test() {
   ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
-  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 0, i32 [[tid]]
+  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(3) [[arrayofVecData]], i32 [[tid]]
   ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
   ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
   ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]

From 4a0d4850d77c13b71cd0bdd40b38a5afc46fb62b Mon Sep 17 00:00:00 2001
From: Erick Velez <erickvelez7@gmail.com>
Date: Mon, 24 Nov 2025 11:01:34 -0800
Subject: [PATCH 34/37] [clang-doc] Add definition information to class
 templates (#169109)

---
 .../clang-doc/assets/class-template.mustache             | 1 +
 clang-tools-extra/test/clang-doc/namespace.cpp           | 9 ++++-----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/clang-tools-extra/clang-doc/assets/class-template.mustache b/clang-tools-extra/clang-doc/assets/class-template.mustache
index c5187026a2399..bbac3ffae02fb 100644
--- a/clang-tools-extra/clang-doc/assets/class-template.mustache
+++ b/clang-tools-extra/clang-doc/assets/class-template.mustache
@@ -140,6 +140,7 @@
             <section class="hero section-container">
                 <div class="hero__title">
                     <h1 class="hero__title-large">{{TagType}} {{Name}}</h1>
+                    <p>Defined at line {{Location.LineNumber}} of file {{Location.Filename}}</p>
                     {{#Description}}
                     <div class="hero__subtitle">
                         {{>Comments}}
diff --git a/clang-tools-extra/test/clang-doc/namespace.cpp b/clang-tools-extra/test/clang-doc/namespace.cpp
index 28f9556b86218..adf7ab7d946ab 100644
--- a/clang-tools-extra/test/clang-doc/namespace.cpp
+++ b/clang-tools-extra/test/clang-doc/namespace.cpp
@@ -58,7 +58,6 @@
 
 // COM: FIXME: Add global functions to the namespace template
 // COM: FIXME: Add namespaces to the namespace template
-// COM: FIXME: Add class definition location to class template
 
 // Anonymous Namespace
 namespace {
@@ -70,7 +69,7 @@ void anonFunction() {}
 class AnonClass {};
 // MD-ANON-CLASS-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-ANON-CLASS-LINE: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
-// MUSTACHE-ANON-CLASS-LINE-NOT: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
+// MUSTACHE-ANON-CLASS-LINE: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
 
 // MD-ANON-CLASS: # class AnonClass
 // HTML-ANON-CLASS: <h1>class AnonClass</h1>
@@ -117,7 +116,7 @@ void functionInPrimaryNamespace() {}
 class ClassInPrimaryNamespace {};
 // MD-PRIMARY-CLASS-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-PRIMARY-CLASS-LINE: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
-// MUSTACHE-PRIMARY-CLASS-LINE-NOT: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
+// MUSTACHE-PRIMARY-CLASS-LINE: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
 
 // MD-PRIMARY-CLASS: # class ClassInPrimaryNamespace
 // MD-PRIMARY-CLASS: Class in PrimaryNamespace
@@ -139,7 +138,7 @@ void functionInNestedNamespace() {}
 class ClassInNestedNamespace {};
 // MD-NESTED-CLASS-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-NESTED-CLASS-LINE: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
-// MUSTACHE-NESTED-CLASS-LINE-NOT: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
+// MUSTACHE-NESTED-CLASS-LINE: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
 
 // MD-NESTED-CLASS: # class ClassInNestedNamespace
 // MD-NESTED-CLASS: Class in NestedNamespace
@@ -233,7 +232,7 @@ void functionInAnotherNamespace() {}
 class ClassInAnotherNamespace {};
 // MD-ANOTHER-CLASS-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-ANOTHER-CLASS-LINE: <p>Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
-// MUSTACHE-ANOTHER-CLASS-LINE-NOT: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
+// MUSTACHE-ANOTHER-CLASS-LINE: <p>Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp</p>
 
 // MD-ANOTHER-CLASS: # class ClassInAnotherNamespace
 // MD-ANOTHER-CLASS:  Class in AnotherNamespace

From 658675fad794197a2a41207b8e4b422becd78f28 Mon Sep 17 00:00:00 2001
From: Erich Keane <ekeane@nvidia.com>
Date: Mon, 24 Nov 2025 11:06:15 -0800
Subject: [PATCH 35/37] [OpenACC][CIR] 'device_resident' clause lowering for
 local declare (#169389)

Just like the last handful of clauses, this is a pretty simple one,
doing device_resident (Entry op: declare_device_resident, and exit:
    delete).  This should be the last of the 'local' declare patches.
---
 clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp   |  16 +-
 clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp |  12 ++
 .../CodeGenOpenACC/declare-deviceresident.cpp | 199 ++++++++++++++++++
 3 files changed, 219 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp

diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
index 9c1aeb87c8029..41a193e4d85c5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
@@ -19,12 +19,9 @@ using namespace clang::CIRGen;
 
 namespace {
 struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup {
-  SourceRange declareRange;
   mlir::acc::DeclareEnterOp enterOp;
 
-  OpenACCDeclareCleanup(SourceRange declareRange,
-                        mlir::acc::DeclareEnterOp enterOp)
-      : declareRange(declareRange), enterOp(enterOp) {}
+  OpenACCDeclareCleanup(mlir::acc::DeclareEnterOp enterOp) : enterOp(enterOp) {}
 
   template <typename OutTy, typename InTy>
   void createOutOp(CIRGenFunction &cgf, InTy inOp) {
@@ -78,8 +75,11 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup {
           createOutOp<mlir::acc::DeleteOp>(cgf, create);
           break;
         }
-      } else if (auto create = val.getDefiningOp<mlir::acc::PresentOp>()) {
-        createOutOp<mlir::acc::DeleteOp>(cgf, create);
+      } else if (auto present = val.getDefiningOp<mlir::acc::PresentOp>()) {
+        createOutOp<mlir::acc::DeleteOp>(cgf, present);
+      } else if (auto dev_res =
+                     val.getDefiningOp<mlir::acc::DeclareDeviceResidentOp>()) {
+        createOutOp<mlir::acc::DeleteOp>(cgf, dev_res);
       } else if (val.getDefiningOp<mlir::acc::DeclareLinkOp>()) {
         // Link has no exit clauses, and shouldn't be copied.
         continue;
@@ -87,7 +87,7 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup {
         // DevicePtr has no exit clauses, and shouldn't be copied.
         continue;
       } else {
-        cgf.cgm.errorNYI(declareRange, "OpenACC local declare clause cleanup");
+        llvm_unreachable("OpenACC local declare clause unexpected defining op");
         continue;
       }
       exitOp.getDataClauseOperandsMutable().append(val);
@@ -106,7 +106,7 @@ void CIRGenFunction::emitOpenACCDeclare(const OpenACCDeclareDecl &d) {
                      d.clauses());
 
   ehStack.pushCleanup<OpenACCDeclareCleanup>(CleanupKind::NormalCleanup,
-                                             d.getSourceRange(), enterOp);
+                                             enterOp);
 }
 
 void CIRGenFunction::emitOpenACCRoutine(const OpenACCRoutineDecl &d) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
index a23ec93ab1d75..60a089fe0e936 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
@@ -1135,6 +1135,18 @@ class OpenACCClauseCIREmitter final
       llvm_unreachable("Unknown construct kind in VisitReductionClause");
     }
   }
+
+  void VisitDeviceResidentClause(const OpenACCDeviceResidentClause &clause) {
+    if constexpr (isOneOfTypes<OpTy, mlir::acc::DeclareEnterOp>) {
+      for (const Expr *var : clause.getVarList())
+        addDataOperand<mlir::acc::DeclareDeviceResidentOp>(
+            var, mlir::acc::DataClause::acc_declare_device_resident, {},
+            /*structured=*/true,
+            /*implicit=*/false);
+    } else {
+      llvm_unreachable("Unknown construct kind in VisitDeviceResidentClause");
+    }
+  }
 };
 
 template <typename OpTy>
diff --git a/clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp b/clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp
new file mode 100644
index 0000000000000..dbec4f22a1bb3
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp
@@ -0,0 +1,199 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+struct HasSideEffects {
+  HasSideEffects();
+  ~HasSideEffects();
+};
+
+// TODO: OpenACC: Implement 'global', NS lowering.
+
+struct Struct {
+  static const HasSideEffects StaticMemHSE;
+  static const HasSideEffects StaticMemHSEArr[5];
+  static const int StaticMemInt;
+
+  // TODO: OpenACC: Implement static-local lowering.
+
+  void MemFunc1(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) {
+    // CHECK: cir.func {{.*}}MemFunc1{{.*}}(%{{.*}}: !cir.ptr<!rec_Struct>{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr<!rec_HasSideEffects>{{.*}})
+    // CHECK-NEXT: cir.alloca{{.*}}["this"
+    // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE"
+    // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt
+    // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_HasSideEffects>{{.*}}["ArgHSEPtr"
+    // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE
+    // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasSideEffects x 5>{{.*}}["LocalHSEArr
+    // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.load
+
+    HasSideEffects LocalHSE;
+    // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr<!rec_HasSideEffects>) -> ()
+    HasSideEffects LocalHSEArr[5];
+    int LocalInt;
+
+#pragma acc declare device_resident(ArgHSE, ArgInt, LocalHSE, LocalInt, ArgHSEPtr[1:1], LocalHSEArr[1:1])
+    // CHECK: %[[ARG_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {name = "ArgHSE"}
+    // CHECK-NEXT: %[[ARG_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "ArgInt"} 
+    // CHECK-NEXT: %[[LOC_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {name = "LocalHSE"} 
+    // CHECK-NEXT: %[[LOC_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "LocalInt"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[ARG_HSE_PTR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) -> !cir.ptr<!cir.ptr<!rec_HasSideEffects>> {name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[LOC_HSE_ARR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) -> !cir.ptr<!cir.array<!rec_HasSideEffects x 5>> {name = "LocalHSEArr[1:1]"}
+    // CHECK-NEXT: %[[ENTER:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    //
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER]]) dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "ArgHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_DEV_RES]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "ArgInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "LocalHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_DEV_RES]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "LocalInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "LocalHSEArr[1:1]"}
+  }
+  void MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr);
+};
+
+void use() {
+  Struct s;
+  s.MemFunc1(HasSideEffects{}, 0, nullptr);
+}
+
+void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) {
+    // CHECK: cir.func {{.*}}MemFunc2{{.*}}(%{{.*}}: !cir.ptr<!rec_Struct>{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr<!rec_HasSideEffects>{{.*}})
+    // CHECK-NEXT: cir.alloca{{.*}}["this"
+    // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE"
+    // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt
+    // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_HasSideEffects>{{.*}}["ArgHSEPtr"
+    // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE
+    // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasSideEffects x 5>{{.*}}["LocalHSEArr
+    // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.load
+    HasSideEffects LocalHSE;
+    // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr<!rec_HasSideEffects>) -> ()
+    HasSideEffects LocalHSEArr[5];
+    // CHECK: do {
+    // CHECK: } while {
+    // CHECK: }
+    int LocalInt;
+#pragma acc declare device_resident(ArgHSE, ArgInt, ArgHSEPtr[1:1])
+    // CHECK: %[[ARG_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {name = "ArgHSE"}
+    // CHECK-NEXT: %[[ARG_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "ArgInt"} 
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[ARG_HSE_PTR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) -> !cir.ptr<!cir.ptr<!rec_HasSideEffects>> {name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+
+#pragma acc declare device_resident(LocalHSE, LocalInt, LocalHSEArr[1:1])
+    // CHECK-NEXT: %[[LOC_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {name = "LocalHSE"} 
+    // CHECK-NEXT: %[[LOC_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "LocalInt"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[LOC_HSE_ARR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) -> !cir.ptr<!cir.array<!rec_HasSideEffects x 5>> {name = "LocalHSEArr[1:1]"}
+    // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "LocalHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_DEV_RES]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "LocalInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "LocalHSEArr[1:1]"}
+    //
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "ArgHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_DEV_RES]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "ArgInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "ArgHSEPtr[1:1]"}
+}
+
+extern "C" void do_thing();
+
+extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) {
+    // CHECK: cir.func {{.*}}NormalFunc(%[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr<!rec_HasSideEffects>{{.*}})
+    // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE"
+    // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt
+    // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_HasSideEffects>{{.*}}["ArgHSEPtr"
+    // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE
+    // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasSideEffects x 5>{{.*}}["LocalHSEArr
+    // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    // CHECK-NEXT: cir.store
+    HasSideEffects LocalHSE;
+    // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr<!rec_HasSideEffects>) -> ()
+    HasSideEffects LocalHSEArr[5];
+    // CHECK: do {
+    // CHECK: } while {
+    // CHECK: }
+    int LocalInt;
+#pragma acc declare device_resident(ArgHSE, ArgInt, ArgHSEPtr[1:1])
+    // CHECK: %[[ARG_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {name = "ArgHSE"}
+    // CHECK-NEXT: %[[ARG_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "ArgInt"} 
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[ARG_HSE_PTR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) -> !cir.ptr<!cir.ptr<!rec_HasSideEffects>> {name = "ArgHSEPtr[1:1]"}
+    // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+    {
+      // CHECK-NEXT: cir.scope {
+#pragma acc declare device_resident(LocalHSE, LocalInt, LocalHSEArr[1:1])
+    // CHECK-NEXT: %[[LOC_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr<!rec_HasSideEffects>) -> !cir.ptr<!rec_HasSideEffects> {name = "LocalHSE"} 
+    // CHECK-NEXT: %[[LOC_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "LocalInt"}
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+    // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64
+    // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64
+    // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64)
+    // CHECK-NEXT: %[[LOC_HSE_ARR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) -> !cir.ptr<!cir.array<!rec_HasSideEffects x 5>> {name = "LocalHSEArr[1:1]"}
+    // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+
+    do_thing();
+    // CHECK-NEXT: cir.call @do_thing
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>)
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "LocalHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_DEV_RES]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "LocalInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr<!cir.array<!rec_HasSideEffects x 5>>) bounds(%[[BOUND2]]) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "LocalHSEArr[1:1]"}
+    }
+    // CHECK-NEXT: }
+
+    // Make sure that cleanup gets put in the right scope.
+    do_thing();
+    // CHECK-NEXT: cir.call @do_thing
+    // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>, !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!rec_HasSideEffects>>)
+ 
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_DEV_RES]] : !cir.ptr<!rec_HasSideEffects>) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "ArgHSE"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_DEV_RES]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "ArgInt"}
+    // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr<!cir.ptr<!rec_HasSideEffects>>) bounds(%[[BOUND1]]) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "ArgHSEPtr[1:1]"}
+}
+

From 0549aa11c2c1b619c673a0644a25f939bf13746f Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12@gmail.com>
Date: Tue, 25 Nov 2025 04:15:08 +0900
Subject: [PATCH 36/37] [llvm][dsymutil] Use the DW_AT_name of the uniqued DIE
 for insertion into .debug_names (#168513)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Depends on:
* https://github.com/llvm/llvm-project/pull/168895

Note, the last commit is the one with the actual fix. The others are
drive-by/test changes

We've been seeing dsymutil verification failures like:
```
error: Name Index @ 0x0: Entry @ 0x11949d: mismatched Name of DIE @ 0x9c644c:
index - apply<(lambda at /some/build/dir/lib/LLVMSupport/include/llvm/Support/Error.h:1070:35)>;
debug_info - apply<(lambda at /some/build/dir/lib/LLVMCustom/include/llvm/Support/Error.h:1070:35)>
apply, _ZN11custom_llvm18ErrorHandlerTraitsIRFvRNS_13ErrorInfoBaseEEE5applyIZNS_12consumeErrorENS_5ErrorEEUlRKS1_E_EES7_OT_NSt3__110unique_ptrIS1_NSD_14default_deleteIS1_EEEE.
```
Not how the name of the DIE has a different lambda path than the one
that was used to insert the DIE into debug_names.

The root cause of the issue is that we have a DW_AT_subprogram
definition whose DW_AT_specification DIE got deduplicated. But the
DW_AT_name of the original specification is different than the one it
got uniqued to. That’s technically fine because dsymutil uniques by
linkage name, which uniquely identifies any function with non-internal
linkage.

But we insert the definition DIE into the debug-names table using the
DW_AT_name of the original specification (we call
`getDIENames(InputDIE…)`). But what we really want to do is use the name
of the adjusted `DW_AT_specifcation` (i.e., the `DW_AT_specification` of
the output DIE). That’s not as simple as it sounds because we can’t just
get ahold of the DIE in the output CU. We have to grab the ODR
`DeclContext` of the input DIE’s specification. That is the only link
back to the canonical specification DIE. For that to be of any use, we
have to stash the `DW_AT_name` into `DeclContext` so we can use it in
`getDIENames`.

We have to account for the possibility of multiple levels of
`DW_AT_specification`/`DW_AT_abstract_origin`. So my proposed solution
is to recursively scan the referenced DIE’s, grab the canonical DIE for
those and get the name from the `DeclContext` (if none exists then use
the `DW_AT_name` of the DIE itself).

One remaining question is whether we need to handle the case where a DIE
has a `DW_AT_specification` *and* a `DW_AT_abstract_origin`? That
complicates the way we locate `DW_AT_name`. We'd have to adjust
`getCanonicalDIEName` to handle this. But it's not clear what a
`DW_AT_name` would be for such cases. Worst case at the moment we take
the wrong path up the specifications and don't find any `DW_AT_name`,
and don't end up indexing that DIE. Something to keep an eye out for.

rdar://149239553
---
 .../llvm/DWARFLinker/Classic/DWARFLinker.h    |   6 +-
 .../Classic/DWARFLinkerDeclContext.h          |  14 ++--
 llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp  |  76 ++++++++++++++++--
 .../Classic/DWARFLinkerDeclContext.cpp        |  32 ++++----
 .../AArch64/dummy-debug-map-arm64.map         |   4 +
 .../AArch64/dwarf5-str-offsets-base-strx.test |   2 +-
 .../tools/dsymutil/AArch64/inlined-low_pc.c   |   4 +-
 .../odr-uniquing-DW_AT_name-conflict.test     |  28 +++++++
 .../odr-uniquing-DW_AT_name-conflict/1.o      | Bin 0 -> 2416 bytes
 .../odr-uniquing-DW_AT_name-conflict/2.o      | Bin 0 -> 2416 bytes
 .../odr-uniquing-DW_AT_name-conflict/lib1.cpp |   5 ++
 .../odr-uniquing-DW_AT_name-conflict/lib1.h   |   3 +
 .../odr-uniquing-DW_AT_name-conflict/lib2.cpp |   5 ++
 .../odr-uniquing-DW_AT_name-conflict/main.cpp |   6 ++
 14 files changed, 155 insertions(+), 30 deletions(-)
 create mode 100644 llvm/test/tools/dsymutil/AArch64/odr-uniquing-DW_AT_name-conflict.test
 create mode 100644 llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/1.o
 create mode 100644 llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/2.o
 create mode 100644 llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp
 create mode 100644 llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.h
 create mode 100644 llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp
 create mode 100644 llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/main.cpp

diff --git a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h
index 5b9535380aebf..d6f5d926f022c 100644
--- a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h
+++ b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h
@@ -708,7 +708,11 @@ class LLVM_ABI DWARFLinker : public DWARFLinkerBase {
     /// already there.
     /// \returns is a name was found.
     bool getDIENames(const DWARFDie &Die, AttributesInfo &Info,
-                     OffsetsStringPool &StringPool, bool StripTemplate = false);
+                     OffsetsStringPool &StringPool, const DWARFFile &File,
+                     CompileUnit &Unit, bool StripTemplate = false);
+
+    llvm::StringRef getCanonicalDIEName(DWARFDie Die, const DWARFFile &File,
+                                        CompileUnit *Unit);
 
     uint32_t hashFullyQualifiedName(DWARFDie DIE, CompileUnit &U,
                                     const DWARFFile &File,
diff --git a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h
index 9fb1b3f80e2ff..5ced6d05cc231 100644
--- a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h
+++ b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h
@@ -84,11 +84,13 @@ class DeclContext {
   DeclContext() : DefinedInClangModule(0), Parent(*this) {}
 
   DeclContext(unsigned Hash, uint32_t Line, uint32_t ByteSize, uint16_t Tag,
-              StringRef Name, StringRef File, const DeclContext &Parent,
-              DWARFDie LastSeenDIE = DWARFDie(), unsigned CUId = 0)
+              StringRef Name, StringRef NameForUniquing, StringRef File,
+              const DeclContext &Parent, DWARFDie LastSeenDIE = DWARFDie(),
+              unsigned CUId = 0)
       : QualifiedNameHash(Hash), Line(Line), ByteSize(ByteSize), Tag(Tag),
-        DefinedInClangModule(0), Name(Name), File(File), Parent(Parent),
-        LastSeenDIE(LastSeenDIE), LastSeenCompileUnitID(CUId) {}
+        DefinedInClangModule(0), Name(Name), NameForUniquing(NameForUniquing),
+        File(File), Parent(Parent), LastSeenDIE(LastSeenDIE),
+        LastSeenCompileUnitID(CUId) {}
 
   uint32_t getQualifiedNameHash() const { return QualifiedNameHash; }
 
@@ -100,6 +102,7 @@ class DeclContext {
 
   uint32_t getCanonicalDIEOffset() const { return CanonicalDIEOffset; }
   void setCanonicalDIEOffset(uint32_t Offset) { CanonicalDIEOffset = Offset; }
+  llvm::StringRef getCanonicalName() const { return Name; }
 
   bool isDefinedInClangModule() const { return DefinedInClangModule; }
   void setDefinedInClangModule(bool Val) { DefinedInClangModule = Val; }
@@ -115,6 +118,7 @@ class DeclContext {
   uint16_t Tag = dwarf::DW_TAG_compile_unit;
   unsigned DefinedInClangModule : 1;
   StringRef Name;
+  StringRef NameForUniquing;
   StringRef File;
   const DeclContext &Parent;
   DWARFDie LastSeenDIE;
@@ -180,7 +184,7 @@ struct DeclMapInfo : private DenseMapInfo<DeclContext *> {
       return RHS == LHS;
     return LHS->QualifiedNameHash == RHS->QualifiedNameHash &&
            LHS->Line == RHS->Line && LHS->ByteSize == RHS->ByteSize &&
-           LHS->Name.data() == RHS->Name.data() &&
+           LHS->NameForUniquing.data() == RHS->NameForUniquing.data() &&
            LHS->File.data() == RHS->File.data() &&
            LHS->Parent.QualifiedNameHash == RHS->Parent.QualifiedNameHash;
   }
diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
index 8637b55c78f9c..daf3788639451 100644
--- a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
@@ -151,22 +151,84 @@ static bool isTypeTag(uint16_t Tag) {
   return false;
 }
 
-bool DWARFLinker::DIECloner::getDIENames(const DWARFDie &Die,
-                                         AttributesInfo &Info,
-                                         OffsetsStringPool &StringPool,
-                                         bool StripTemplate) {
+/// Recurse through the input DIE's canonical references until we find a
+/// DW_AT_name.
+llvm::StringRef
+DWARFLinker::DIECloner::getCanonicalDIEName(DWARFDie Die, const DWARFFile &File,
+                                            CompileUnit *Unit) {
+  if (!Die)
+    return {};
+
+  std::optional<DWARFFormValue> Ref;
+
+  auto GetDieName = [](const DWARFDie &D) -> llvm::StringRef {
+    auto NameForm = D.find(llvm::dwarf::DW_AT_name);
+    if (!NameForm)
+      return {};
+
+    auto NameOrErr = NameForm->getAsCString();
+    if (!NameOrErr) {
+      llvm::consumeError(NameOrErr.takeError());
+      return {};
+    }
+
+    return *NameOrErr;
+  };
+
+  llvm::StringRef Name = GetDieName(Die);
+  if (!Name.empty())
+    return Name;
+
+  while (true) {
+    if (!(Ref = Die.find(llvm::dwarf::DW_AT_specification)) &&
+        !(Ref = Die.find(llvm::dwarf::DW_AT_abstract_origin)))
+      break;
+
+    Die = Linker.resolveDIEReference(File, CompileUnits, *Ref, Die, Unit);
+    if (!Die)
+      break;
+
+    assert(Unit);
+
+    unsigned SpecIdx = Unit->getOrigUnit().getDIEIndex(Die);
+    CompileUnit::DIEInfo &SpecInfo = Unit->getInfo(SpecIdx);
+    if (SpecInfo.Ctxt && SpecInfo.Ctxt->hasCanonicalDIE()) {
+      if (!SpecInfo.Ctxt->getCanonicalName().empty()) {
+        Name = SpecInfo.Ctxt->getCanonicalName();
+        break;
+      }
+    }
+
+    Name = GetDieName(Die);
+    if (!Name.empty())
+      break;
+  }
+
+  return Name;
+}
+
+bool DWARFLinker::DIECloner::getDIENames(
+    const DWARFDie &Die, AttributesInfo &Info, OffsetsStringPool &StringPool,
+    const DWARFFile &File, CompileUnit &Unit, bool StripTemplate) {
   // This function will be called on DIEs having low_pcs and
   // ranges. As getting the name might be more expansive, filter out
   // blocks directly.
   if (Die.getTag() == dwarf::DW_TAG_lexical_block)
     return false;
 
+  // The mangled name of an specification DIE will by virtue of the
+  // uniquing algorithm be the same as the one it got uniqued into.
+  // So just use the input DIE's linkage name.
   if (!Info.MangledName)
     if (const char *MangledName = Die.getLinkageName())
       Info.MangledName = StringPool.getEntry(MangledName);
 
+  // For subprograms with linkage names, we unique on the linkage name,
+  // so DW_AT_name's may differ between the input and canonical DIEs.
+  // Use the name of the canonical DIE.
   if (!Info.Name)
-    if (const char *Name = Die.getShortName())
+    if (llvm::StringRef Name = getCanonicalDIEName(Die, File, &Unit);
+        !Name.empty())
       Info.Name = StringPool.getEntry(Name);
 
   if (!Info.MangledName)
@@ -1939,7 +2001,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
   // accelerator tables too. For now stick with dsymutil's behavior.
   if ((Info.InDebugMap || AttrInfo.HasLowPc || AttrInfo.HasRanges) &&
       Tag != dwarf::DW_TAG_compile_unit &&
-      getDIENames(InputDIE, AttrInfo, DebugStrPool,
+      getDIENames(InputDIE, AttrInfo, DebugStrPool, File, Unit,
                   Tag != dwarf::DW_TAG_inlined_subroutine)) {
     if (AttrInfo.MangledName && AttrInfo.MangledName != AttrInfo.Name)
       Unit.addNameAccelerator(Die, AttrInfo.MangledName,
@@ -1962,7 +2024,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
   } else if (Tag == dwarf::DW_TAG_imported_declaration && AttrInfo.Name) {
     Unit.addNamespaceAccelerator(Die, AttrInfo.Name);
   } else if (isTypeTag(Tag) && !AttrInfo.IsDeclaration) {
-    bool Success = getDIENames(InputDIE, AttrInfo, DebugStrPool);
+    bool Success = getDIENames(InputDIE, AttrInfo, DebugStrPool, File, Unit);
     uint64_t RuntimeLang =
         dwarf::toUnsigned(InputDIE.find(dwarf::DW_AT_APPLE_runtime_class))
             .value_or(0);
diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp
index c9c8dddce9c44..66a1ba9c6711f 100644
--- a/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp
+++ b/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp
@@ -84,24 +84,26 @@ DeclContextTree::getChildDeclContext(DeclContext &Context, const DWARFDie &DIE,
     break;
   }
 
-  StringRef NameRef;
+  StringRef Name = DIE.getShortName();
+  StringRef NameForUniquing;
   StringRef FileRef;
 
   if (const char *LinkageName = DIE.getLinkageName())
-    NameRef = StringPool.internString(LinkageName);
-  else if (const char *ShortName = DIE.getShortName())
-    NameRef = StringPool.internString(ShortName);
+    NameForUniquing = StringPool.internString(LinkageName);
+  else if (!Name.empty())
+    NameForUniquing = StringPool.internString(Name);
 
-  bool IsAnonymousNamespace = NameRef.empty() && Tag == dwarf::DW_TAG_namespace;
+  bool IsAnonymousNamespace =
+      NameForUniquing.empty() && Tag == dwarf::DW_TAG_namespace;
   if (IsAnonymousNamespace) {
     // FIXME: For dsymutil-classic compatibility. I think uniquing within
     // anonymous namespaces is wrong. There is no ODR guarantee there.
-    NameRef = "(anonymous namespace)";
+    NameForUniquing = "(anonymous namespace)";
   }
 
   if (Tag != dwarf::DW_TAG_class_type && Tag != dwarf::DW_TAG_structure_type &&
       Tag != dwarf::DW_TAG_union_type &&
-      Tag != dwarf::DW_TAG_enumeration_type && NameRef.empty())
+      Tag != dwarf::DW_TAG_enumeration_type && NameForUniquing.empty())
     return PointerIntPair<DeclContext *, 1>(nullptr);
 
   unsigned Line = 0;
@@ -140,10 +142,10 @@ DeclContextTree::getChildDeclContext(DeclContext &Context, const DWARFDie &DIE,
     }
   }
 
-  if (!Line && NameRef.empty())
+  if (!Line && NameForUniquing.empty())
     return PointerIntPair<DeclContext *, 1>(nullptr);
 
-  // We hash NameRef, which is the mangled name, in order to get most
+  // We hash NameForUniquing, which is the mangled name, in order to get most
   // overloaded functions resolve correctly.
   //
   // Strictly speaking, hashing the Tag is only necessary for a
@@ -153,7 +155,8 @@ DeclContextTree::getChildDeclContext(DeclContext &Context, const DWARFDie &DIE,
   // FIXME: dsymutil-classic won't unique the same type presented
   // once as a struct and once as a class. Using the Tag in the fully
   // qualified name hash to get the same effect.
-  unsigned Hash = hash_combine(Context.getQualifiedNameHash(), Tag, NameRef);
+  unsigned Hash =
+      hash_combine(Context.getQualifiedNameHash(), Tag, NameForUniquing);
 
   // FIXME: dsymutil-classic compatibility: when we don't have a name,
   // use the filename.
@@ -161,15 +164,16 @@ DeclContextTree::getChildDeclContext(DeclContext &Context, const DWARFDie &DIE,
     Hash = hash_combine(Hash, FileRef);
 
   // Now look if this context already exists.
-  DeclContext Key(Hash, Line, ByteSize, Tag, NameRef, FileRef, Context);
+  DeclContext Key(Hash, Line, ByteSize, Tag, Name, NameForUniquing, FileRef,
+                  Context);
   auto ContextIter = Contexts.find(&Key);
 
   if (ContextIter == Contexts.end()) {
     // The context wasn't found.
     bool Inserted;
-    DeclContext *NewContext =
-        new (Allocator) DeclContext(Hash, Line, ByteSize, Tag, NameRef, FileRef,
-                                    Context, DIE, U.getUniqueID());
+    DeclContext *NewContext = new (Allocator)
+        DeclContext(Hash, Line, ByteSize, Tag, Name, NameForUniquing, FileRef,
+                    Context, DIE, U.getUniqueID());
     std::tie(ContextIter, Inserted) = Contexts.insert(NewContext);
     assert(Inserted && "Failed to insert DeclContext");
     (void)Inserted;
diff --git a/llvm/test/tools/dsymutil/AArch64/dummy-debug-map-arm64.map b/llvm/test/tools/dsymutil/AArch64/dummy-debug-map-arm64.map
index 50d860290422c..bd2b2014ee22c 100644
--- a/llvm/test/tools/dsymutil/AArch64/dummy-debug-map-arm64.map
+++ b/llvm/test/tools/dsymutil/AArch64/dummy-debug-map-arm64.map
@@ -11,9 +11,13 @@ objects:
   - filename: 1.o
     symbols:
       - { sym: _bar, objAddr: 0x0, binAddr: 0x10000, size: 0x10 }
+      - { sym: __Z13lib1_internalv, objAddr: 0x0, binAddr: 0x10020, size: 0x20 }
+      - { sym: __ZN3Foo4funcIZ13lib1_internalvE3$_0EEvv, objAddr: 0x0, binAddr: 0x10040, size: 0x20 }
   - filename: 2.o
     symbols:
       - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x20000, size: 0x10 }
+      - { sym: __Z13lib1_internalv, objAddr: 0x0, binAddr: 0x20020, size: 0x20 }
+      - { sym: __ZN3Foo4funcIZ13lib1_internalvE3$_0EEvv, objAddr: 0x0, binAddr: 0x20040, size: 0x20 }
   - filename: 3.o
     symbols:
       - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x30000, size: 0x10 }
diff --git a/llvm/test/tools/dsymutil/AArch64/dwarf5-str-offsets-base-strx.test b/llvm/test/tools/dsymutil/AArch64/dwarf5-str-offsets-base-strx.test
index c0c4fe835682f..c5110a873c603 100644
--- a/llvm/test/tools/dsymutil/AArch64/dwarf5-str-offsets-base-strx.test
+++ b/llvm/test/tools/dsymutil/AArch64/dwarf5-str-offsets-base-strx.test
@@ -98,7 +98,7 @@ CHECK:               DW_AT_str_offsets_base [DW_FORM_sec_offset]       (0x000000
 CHECK:               DW_AT_comp_dir [DW_FORM_strx]     (indexed (00000004) string = "/Users/shubham/Development/test109275485")
 
 CHECK:   DW_TAG_subprogram
-CHECK:                 DW_AT_low_pc [DW_FORM_addrx]    (indexed (00000000) address = 0x0000000000010000)
+CHECK:                 DW_AT_low_pc [DW_FORM_addrx]    (indexed (00000000) address = 0x0000000000010040)
 CHECK:                 DW_AT_linkage_name [DW_FORM_strx]       (indexed (00000005) string = "_Z4foo2i")
 CHECK:                 DW_AT_name [DW_FORM_strx]       (indexed (00000006) string = "foo2")
 
diff --git a/llvm/test/tools/dsymutil/AArch64/inlined-low_pc.c b/llvm/test/tools/dsymutil/AArch64/inlined-low_pc.c
index d2d36f675e8b7..b89a6f99ebcb3 100644
--- a/llvm/test/tools/dsymutil/AArch64/inlined-low_pc.c
+++ b/llvm/test/tools/dsymutil/AArch64/inlined-low_pc.c
@@ -10,10 +10,10 @@ int bar(int a) { return foo(a); }
 // RUN: llvm-dwarfdump - | FileCheck %s
 
 // CHECK: DW_TAG_subprogram
-// CHECK: DW_AT_low_pc{{.*}}0x0000000000010000
+// CHECK: DW_AT_low_pc{{.*}}0x0000000000010040
 // CHECK: DW_AT_name{{.*}}"bar"
 // CHECK-NOT: NULL
 // CHECK: DW_TAG_inlined_subroutine
 // CHECK-NEXT: DW_AT_abstract_origin{{.*}}"foo"
-// CHECK-NEXT: DW_AT_low_pc{{.*}}0x0000000000010000
+// CHECK-NEXT: DW_AT_low_pc{{.*}}0x0000000000010040
 
diff --git a/llvm/test/tools/dsymutil/AArch64/odr-uniquing-DW_AT_name-conflict.test b/llvm/test/tools/dsymutil/AArch64/odr-uniquing-DW_AT_name-conflict.test
new file mode 100644
index 0000000000000..b6edb8bca3194
--- /dev/null
+++ b/llvm/test/tools/dsymutil/AArch64/odr-uniquing-DW_AT_name-conflict.test
@@ -0,0 +1,28 @@
+# Tests the case where a DW_TAG_subprogram for a method declaration
+# got uniqued into a DW_TAG_subprogram with the same linkage name (but
+# different DW_AT_name). Make sure the DW_TAG_subprogram DIE for the
+# definition, which previously pointed to the now de-deduplicated declaration,
+# gets inserted into the .debug_names table using the DW_AT_name of the canonical
+# declaration DW_TAG_subprogram.
+#
+# Object files compiled as follows:
+#   clang -g -c -o 1.o Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp
+#   clang -g -c -o 2.o Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp
+
+# RUN: dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing-DW_AT_name-conflict -y %p/dummy-debug-map-arm64.map -o - \
+# RUN:     | llvm-dwarfdump --verify - | FileCheck %s
+
+# RUN: dsymutil --linker parallel -f -oso-prepend-path=%p/../Inputs/odr-uniquing-DW_AT_name-conflict -y %p/dummy-debug-map-arm64.map -o - \
+# RUN:     | not llvm-dwarfdump --verify - | FileCheck %s --check-prefix=PARALLEL-ODR
+
+# RUN: dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing-DW_AT_name-conflict -y %p/dummy-debug-map-arm64.map -no-odr -o - \
+# RUN:     | llvm-dwarfdump --verify - | FileCheck %s
+
+# RUN: dsymutil --linker parallel -f -oso-prepend-path=%p/../Inputs/odr-uniquing-DW_AT_name-conflict -y %p/dummy-debug-map-arm64.map -no-odr -o - \
+# RUN:     | llvm-dwarfdump --verify - | FileCheck %s
+
+# CHECK: No errors.
+
+# FIXME: parallel DWARFLinker uses wrong DW_AT_name when inserting uniqued subprogram into .debug_names
+# PARALLEL-ODR:      Verifying .debug_names...
+# PARALLEL-ODR-NEXT: error: Name Index {{.*}} mismatched Name of DIE
diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/1.o b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/1.o
new file mode 100644
index 0000000000000000000000000000000000000000..5932a3c89aaeb1b87292f3466bd5550f4c685637
GIT binary patch
literal 2416
zcma)7O>Ep$5T0kRH@oZ7G~I|K1c57I(SqXrNt9NhGzr<Xgrqd00m7dwukCGC?ApuP
zy9H9zpIkuV5{Vvqq6i^Ti9-$`B5^23dZ-{5)MGgT2L$SgpQdHTe%>^@MP#IT^Yi9?
zp7&<n^Y?%Mc4rp=10)(^$iIj<D9A_y$V+7Z2YpTpkP=)*b{G1HQ#6UB>%RTAAL^mK
zFul02Wk*_;D}*PYzLwz0#o$~NLsHi*yVy9T8^vPXZjxPidGdv$t&j-k689Y9`LzN<
z_?8*rm7S7H&UU<OJn~&GH+G2U`E}hbmArPmso=x`4spC$i7Q)zw%?*CKg0`s?Rd)^
zuf*lb4)F}j3IN;je&S#6=NbGO50!PS2cNocevVPKJq&HfyUnB5I9?2!V;ex5sq3a&
ztr@1TH=NUD$I|t=0$&=4HQtjG+_{c;<ofP^@~e~`o0hca_Xo#2&+-1G=0&;@^<PJR
zVgC`HPDO6(zqfdQ91LOL9mHv)ZQIY|B%O6wzK8H4A|p`{`ETNSf>e?K^C)Fxl0=j`
zF|F-vOLXpl?ykVb<}$moIR&eiyI=#Ce6+bdc?JEe--`N=cc)<EW*7X0GO$?>Q`NL2
zsu#|vzn@j>eS7)_)t<q<ajy@P))=W1{f;WCWAan-I6fny0Hd-jlZk1j*{Ce4ktt~`
z3Ozhv?Zc19Gcc1DReVGwR7P-pq8k-}WQhIq*@20ED}4+-DG7vrQ4OO2-vW?;1;@=`
zA230Tjy?wTDv%|Bf+MRYQy_{G!&8e%5@Ss2N-!aw!?}3Z2^=w+3YcVjaAdB9<I~6~
zVKpJsV-|*nK`?@j=+=#+{w(rW$k&n6SeriN0pv+!s?{wN2HCY*#a7IU;ha*McHJwx
zj*`!5SuLyV54=P9Tox@Ym&<CS2b9;IEIYnkcZ|xA>%9S$axtfwwHjmysB9WO0(hB4
z)3xlpX4Gn#DHr1%-^rd@Gsc0Ds+G5nHSuX>r|hI!)TqE_RMdty!&W~1r+s5YX_
z@ULo~^(KVNI{ld!J={X3S~iyqyHaeJOPPXu+NrpP<z?!&)i7-<NLi0I0s5=CVcJB?
z(}bh03kx{${4i=GrG{h9bOcQgKc;7=r<+Zn`NR7wMzv@eis38KiHwbm<%bU(0e`9N
zK`)-R6cis7BqbPw;R_t8a8#Fw$R_SVrjD7Ld+B)gV3r!FMNvH6xz{g!EZ#VCXZ*8U
zV13}dFDE~FaF770*i>|C04g{UJ*D{1g}unZo!7*43_Dy%hZ2~IGr)r1V>k-II}-Y7
zu0=|ybzFPy2Wng#w!JmvdJmb7V;&_o{y)1PAc09z(x^Ndte`wZ)VrlegvU{bFMAt9
z&s@AXI{`mjeeU+dSLr<1wP|C0YPfLzm0ia^yk7it9Fs}5KG-WIQ{PBTUWqXYzLwOK
zd_hRy7k1Pr+E#Z5MO_N^gBcFdzEH-gHY61;Q3STZWdY@G42XzSvK8()LN<a#|C1-s
b?q=usEDC+6U5J4leO&RYwH(k$2SxNR3xQzl

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/2.o b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/2.o
new file mode 100644
index 0000000000000000000000000000000000000000..607b47059e98275ce59b169e18252ed9bc957a1b
GIT binary patch
literal 2416
zcma)7O>Ep$5T3VQZ}z7&O*bM5LEuWYXhE@eH&I$bX%ez&2}x;01B5?WUfbKO@Y>7T
zy9EO3Pc9&Fi9`=Q6@-we)I$y+B5^23dZ-{5ggBKGa6mv${4|sq`*}llL1d(P^Yi9?
zp7&<n^Ot}8^w%x`21qo-k$(_ykdToEkeA5*5Bi7}ASJns>@M^Xr)d&N(|qeyKh#5e
zVR~_4%Z{`hR|qeL`dX4F7l(6E3`tEhty1H(rk6@}t4Vh8*~zDlwL&7COWbpq=hq4d
z;ag^eS8>WNIot8B^T^k^+}I(W=hrp2T=v@Wrh*d(ILz^8MXqcK+J1|o{17kjwc{;w
zyfT+7JH*pXGXQMI`;LFTpJ(u6B2?C~9(?Lw^K<m7<zZ+$-fbSe#_{6V9NPfeOieS~
zYE3tMt>K)hIHsn}75UOYtnr?l;Ldf#Bi9%Ilb>C2ELzf@-!B~RJjeTuniuJtsQ)_h
z3;U1oG%9ji|GmQd<4_0#uOUtwZQFhUC+V!i^4)_M5gCbs$bS>p6Qq;^m`9mGCP_r8
z6VuwxwnXO+=*|jkY%a4an^Um*UKecOl6N<kC$FG?^>ab{`py(=-0XtiQ3f{aVX~YR
z1^L1o^3P}Gdf%SDLAhseZ^G-tq!mW$M86{o@>u4P%s4(HLJUSTnGBhjW|oa+1UWKA
zg+-x92F!i<@q`L9SwY4}L_}o-*C)GC0Z4_|-=7_r=r^;+(UTS-)-T9m6yRF`BCz1N
z8SDcV6QZM!1HB4l#X!Q5Ra0pY1d-vX#YK@ZCUzxRERn~#MAu0iF`5dPVta68u8HHL
z$m!T>GDDA97#aq_2s)x$H;($_$e$qJK+a-q`j7{ZCy}XEw@~P0*J`#U8Mf}6mYP=G
ztGJF-$SXM|Cmjg9Lxp?}EhV4NDWeCa7apoOzEyX0d&u=(0=rTwC`PRYDgjjt-A4dV
zT{K+NDkyrbrcSvS@A#ftw3?Rf)~vcZZ|i>9tyewugl?QXwW!p%;~z1ninGylVN@AW
zX82bX&wLreWu1QYX%DxcRx8GmZrP=Vv7{E=Gmh=*rl-~|vtd{!rJUEIO@Q`%et4Uw
z3CCO)7I5OpVbn&-4ab=22$~+=ujQtvn@ynkBL{4~S~7J>_oe7W#zw{p!v~Lozf|#{
z7f)LZijNAC6pX=e6-O!@)g>abiF=T#W9H_bIgvY*qXudb#nZi`JNM$H%fg4Zz<l`I
zUu$oBZ)H;N-#tixQNgJJsNh8Ol;S@Z_96#&UJ<f!?C@ANl)z-10T%oo!!Z`TBcY$>
zS|o*9$F=8vpvJ{v+gn4fH<9T$=22qf|GE1v60{>^RPGN}P#yy6-Qs<*2T+HPdmBTK
zUA#Cu0bgBv;`Y7Q=sei<X?=ZaxOo1#UB}<KQTkvUlS#Ec*ej;epNTB95@#ZOD$421
zg;)~5u%kv%wz``-L|F><gBcFdzECEpHY6o3Q3STmWfA3W42XzSvL)^~N;ZN-|C0yN
b?q=usEDCL>U5J4leQf*HS{`VmgChDH*Q8+8

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp
new file mode 100644
index 0000000000000..4cf90f071ee8c
--- /dev/null
+++ b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp
@@ -0,0 +1,5 @@
+#include "lib1.h"
+
+[[gnu::weak]] void lib1_internal() {
+  Foo{}.func<decltype([]{})>();
+}
diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.h b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.h
new file mode 100644
index 0000000000000..3b3cefbaeac17
--- /dev/null
+++ b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.h
@@ -0,0 +1,3 @@
+struct Foo {
+  template<typename T> void func() {}
+};
diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp
new file mode 100644
index 0000000000000..4cf90f071ee8c
--- /dev/null
+++ b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp
@@ -0,0 +1,5 @@
+#include "lib1.h"
+
+[[gnu::weak]] void lib1_internal() {
+  Foo{}.func<decltype([]{})>();
+}
diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/main.cpp b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/main.cpp
new file mode 100644
index 0000000000000..77f2cc4c8affe
--- /dev/null
+++ b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/main.cpp
@@ -0,0 +1,6 @@
+[[gnu::weak]] void lib1_internal();
+
+int main() {
+    lib1_internal();
+    __builtin_debugtrap();
+}

From e92bb83c1810c61a7fa81d55a1690cffa2b14b60 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko@accesssoftek.com>
Date: Mon, 24 Nov 2025 22:18:51 +0300
Subject: [PATCH 37/37] [AArch64][PAC] Simplify emission of authenticated
 pointer check (NFC) (#160899)

The `AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue` method accepts
two arguments, `bool ShouldTrap` and `const MCSymbol *OnFailure`, that
control the behavior of the emitted instruction sequence when the check
fails:
* `ShouldTrap` requests an error to be generated
* `OnFailure` requests branching to the given label after clearing the
  PAC field

An assertion in `emitPtrauthCheckAuthenticatedValue` ensures that when
`ShouldTrap` is true, `OnFailure` must be null. But the opposite holds
as well: when `ShouldTrap` is false, `OnFailure` is always non-null,
as otherwise the entire sequence following `AUT[ID][AB]` instruction
would turn into a very expensive equivalent of XPAC (unless the CPU
implements FEAT_FPAC):

    authenticate Xn
    inspect PAC field of Xn
    if PAC field was not cleared:
      clear PAC field

In other words, the value of `ShouldTrap` argument can be computed as
`OnFailure == nullptr` at all existing call sites. In fact, at three
of four call sites, constant `true` and `nullptr` are passed as the
values of these function arguments. `emitPtrauthAuthResign` is the
only caller that potentially makes use of checking-but-not-trapping
mode of `emitPtrauthCheckAuthenticatedValue`, and it passes a non-null
pointer as `OnFailure` when `ShouldTrap` is false.

This commit makes the invariant explicit by omitting the `ShouldTrap`
argument and inferring its value from the `OnFailure` argument instead.
---
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 54 +++++++++----------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 73d9699f71477..5da6181ba36dd 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -162,8 +162,7 @@ class AArch64AsmPrinter : public AsmPrinter {
                                           Register ScratchReg,
                                           AArch64PACKey::ID Key,
                                           AArch64PAuth::AuthCheckMethod Method,
-                                          bool ShouldTrap,
-                                          const MCSymbol *OnFailure);
+                                          const MCSymbol *OnFailure = nullptr);
 
   // Check authenticated LR before tail calling.
   void emitPtrauthTailCallHardening(const MachineInstr *TC);
@@ -1937,14 +1936,19 @@ Register AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
   return ScratchReg;
 }
 
-/// Emits a code sequence to check an authenticated pointer value.
+/// Emit a code sequence to check an authenticated pointer value.
 ///
-/// If OnFailure argument is passed, jump there on check failure instead
-/// of proceeding to the next instruction (only if ShouldTrap is false).
+/// This function emits a sequence of instructions that checks if TestedReg was
+/// authenticated successfully. On success, execution continues at the next
+/// instruction after the sequence.
+///
+/// The action performed on failure depends on the OnFailure argument:
+/// * if OnFailure is not nullptr, control is transferred to that label after
+///   clearing the PAC field
+/// * otherwise, BRK instruction is emitted to generate an error
 void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue(
     Register TestedReg, Register ScratchReg, AArch64PACKey::ID Key,
-    AArch64PAuth::AuthCheckMethod Method, bool ShouldTrap,
-    const MCSymbol *OnFailure) {
+    AArch64PAuth::AuthCheckMethod Method, const MCSymbol *OnFailure) {
   // Insert a sequence to check if authentication of TestedReg succeeded,
   // such as:
   //
@@ -1981,7 +1985,7 @@ void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue(
                        .addReg(getWRegFromXReg(ScratchReg))
                        .addReg(TestedReg)
                        .addImm(0));
-    assert(ShouldTrap && !OnFailure && "DummyLoad always traps on error");
+    assert(!OnFailure && "DummyLoad always traps on error");
     return;
   }
 
@@ -2035,15 +2039,14 @@ void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue(
     llvm_unreachable("Unsupported check method");
   }
 
-  if (ShouldTrap) {
-    assert(!OnFailure && "Cannot specify OnFailure with ShouldTrap");
+  if (!OnFailure) {
     // Trapping sequences do a 'brk'.
     //  brk #<0xc470 + aut key>
     EmitToStreamer(MCInstBuilder(AArch64::BRK).addImm(0xc470 | Key));
   } else {
     // Non-trapping checked sequences return the stripped result in TestedReg,
-    // skipping over success-only code (such as re-signing the pointer) if
-    // there is one.
+    // skipping over success-only code (such as re-signing the pointer) by
+    // jumping to OnFailure label.
     // Note that this can introduce an authentication oracle (such as based on
     // the high bits of the re-signed value).
 
@@ -2068,12 +2071,9 @@ void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue(
           MCInstBuilder(XPACOpc).addReg(TestedReg).addReg(TestedReg));
     }
 
-    if (OnFailure) {
-      //  b Lend
-      EmitToStreamer(
-          MCInstBuilder(AArch64::B)
-              .addExpr(MCSymbolRefExpr::create(OnFailure, OutContext)));
-    }
+    //  b Lend
+    const auto *OnFailureExpr = MCSymbolRefExpr::create(OnFailure, OutContext);
+    EmitToStreamer(MCInstBuilder(AArch64::B).addExpr(OnFailureExpr));
   }
 
   // If the auth check succeeds, we can continue.
@@ -2100,9 +2100,8 @@ void AArch64AsmPrinter::emitPtrauthTailCallHardening(const MachineInstr *TC) {
          "Neither x16 nor x17 is available as a scratch register");
   AArch64PACKey::ID Key =
       AArch64FI->shouldSignWithBKey() ? AArch64PACKey::IB : AArch64PACKey::IA;
-  emitPtrauthCheckAuthenticatedValue(
-      AArch64::LR, ScratchReg, Key, LRCheckMethod,
-      /*ShouldTrap=*/true, /*OnFailure=*/nullptr);
+  emitPtrauthCheckAuthenticatedValue(AArch64::LR, ScratchReg, Key,
+                                     LRCheckMethod);
 }
 
 void AArch64AsmPrinter::emitPtrauthAuthResign(
@@ -2176,9 +2175,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(
     if (IsAUTPAC && !ShouldTrap)
       EndSym = createTempSymbol("resign_end_");
 
-    emitPtrauthCheckAuthenticatedValue(AUTVal, Scratch, AUTKey,
-                                       AArch64PAuth::AuthCheckMethod::XPAC,
-                                       ShouldTrap, EndSym);
+    emitPtrauthCheckAuthenticatedValue(
+        AUTVal, Scratch, AUTKey, AArch64PAuth::AuthCheckMethod::XPAC, EndSym);
   }
 
   // We already emitted unchecked and checked-but-non-trapping AUTs.
@@ -2517,9 +2515,7 @@ void AArch64AsmPrinter::LowerMOVaddrPAC(const MachineInstr &MI) {
                                                      : AArch64PACKey::DA);
 
         emitPtrauthCheckAuthenticatedValue(AArch64::X16, AArch64::X17, AuthKey,
-                                           AArch64PAuth::AuthCheckMethod::XPAC,
-                                           /*ShouldTrap=*/true,
-                                           /*OnFailure=*/nullptr);
+                                           AArch64PAuth::AuthCheckMethod::XPAC);
       }
     } else {
       EmitToStreamer(MCInstBuilder(AArch64::LDRXui)
@@ -2652,9 +2648,7 @@ void AArch64AsmPrinter::LowerLOADgotAUTH(const MachineInstr &MI) {
         (AuthOpcode == AArch64::AUTIA ? AArch64PACKey::IA : AArch64PACKey::DA);
 
     emitPtrauthCheckAuthenticatedValue(AuthResultReg, AArch64::X17, AuthKey,
-                                       AArch64PAuth::AuthCheckMethod::XPAC,
-                                       /*ShouldTrap=*/true,
-                                       /*OnFailure=*/nullptr);
+                                       AArch64PAuth::AuthCheckMethod::XPAC);
 
     emitMovXReg(DstReg, AuthResultReg);
   }