diff --git a/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h b/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h
index 95216368492ca..d72d021f44838 100644
--- a/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h
+++ b/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h
@@ -137,7 +137,7 @@ class ExpandModularHeadersPPCallbacks : public PPCallbacks {
   std::unique_ptr<Preprocessor> PP;
   bool EnteredMainFile = false;
   bool StartedLexing = false;
-  Token CurrentToken;
+  Token CurrentToken = Token();
 };
 
 } // namespace tooling
diff --git a/clang-tools-extra/clang-tidy/bugprone/FloatLoopCounterCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/FloatLoopCounterCheck.cpp
index adf2d2b4bcc07..38a0234337756 100644
--- a/clang-tools-extra/clang-tidy/bugprone/FloatLoopCounterCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/FloatLoopCounterCheck.cpp
@@ -31,6 +31,7 @@ void FloatLoopCounterCheck::registerMatchers(MatchFinder *Finder) {
 
 void FloatLoopCounterCheck::check(const MatchFinder::MatchResult &Result) {
   const auto *FS = Result.Nodes.getNodeAs<ForStmt>("for");
+  assert(FS && "FS should not be null");
 
   diag(FS->getInc()->getBeginLoc(), "loop induction expression should not have "
                                     "floating-point type")
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccessCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccessCheck.cpp
index 54c4692923949..cf4b445a554e8 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccessCheck.cpp
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccessCheck.cpp
@@ -176,7 +176,7 @@ void ProBoundsAvoidUncheckedContainerAccessCheck::check(
     }
   } else if (const auto *MCE = dyn_cast<CXXMemberCallExpr>(MatchedExpr)) {
     // Case: a.operator[](i) or a->operator[](i)
-    const auto *Callee = dyn_cast<MemberExpr>(MCE->getCallee());
+    const auto *Callee = cast<MemberExpr>(MCE->getCallee());
 
     if (FixMode == At) {
       // Cases: a.operator[](i) => a.at(i) and a->operator[](i) => a->at(i)
diff --git a/clang/include/clang/AST/ASTConsumer.h b/clang/include/clang/AST/ASTConsumer.h
index 447f2592d2359..a1ef187ee2069 100644
--- a/clang/include/clang/AST/ASTConsumer.h
+++ b/clang/include/clang/AST/ASTConsumer.h
@@ -27,6 +27,7 @@ namespace clang {
   class VarDecl;
   class FunctionDecl;
   class ImportDecl;
+  class OpenACCRoutineDecl;
 
 /// ASTConsumer - This is an abstract interface that should be implemented by
 /// clients that read ASTs.  This abstraction layer allows the client to be
@@ -116,6 +117,11 @@ class ASTConsumer {
   // variable has been instantiated.
   virtual void HandleCXXStaticMemberVarInstantiation(VarDecl *D) {}
 
+  /// Callback to handle the end-of-translation unit attachment of OpenACC
+  /// routine declaration information.
+  virtual void HandleOpenACCRoutineReference(const FunctionDecl *FD,
+                                             const OpenACCRoutineDecl *RD) {}
+
   /// Callback involved at the end of a translation unit to
   /// notify the consumer that a vtable for the given C++ class is
   /// required.
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index 51b7dd16e5195..77531c31538c1 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -783,15 +783,15 @@ let params = T.Unsigned in {
 }
 let params = T.Float in {
   def vminnmq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
-                                 (IRIntBase<"minnum", [Vector]> $a, $b)>;
+                                 (fminnm $a, $b)>;
   def vmaxnmq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
-                                 (IRIntBase<"maxnum", [Vector]> $a, $b)>;
+                                 (fmaxnm $a, $b)>;
   def vminnmaq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
-                                  (IRIntBase<"minnum", [Vector]>
+                                  (fminnm
                                    (IRIntBase<"fabs", [Vector]> $a),
                                    (IRIntBase<"fabs", [Vector]> $b))>;
   def vmaxnmaq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
-                                  (IRIntBase<"maxnum", [Vector]>
+                                  (fmaxnm
                                    (IRIntBase<"fabs", [Vector]> $a),
                                    (IRIntBase<"fabs", [Vector]> $b))>;
 }
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 3714262898476..3210549d0cb56 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -589,6 +589,10 @@ def fsub: strictFPAlt<fsub_node,
                       IRInt<"vsub", [Vector]>>;
 def fmul: strictFPAlt<fmul_node,
                       IRInt<"vmul", [Vector]>>;
+def fminnm : strictFPAlt<IRIntBase<"minnum", [Vector]>,
+                         IRInt<"vminnm", [Vector]>>;
+def fmaxnm : strictFPAlt<IRIntBase<"maxnum", [Vector]>,
+                         IRInt<"vmaxnm", [Vector]>>;
 
 // -----------------------------------------------------------------------------
 // Convenience lists of parameter types. 'T' is just a container record, so you
diff --git a/clang/include/clang/CIR/CIRGenerator.h b/clang/include/clang/CIR/CIRGenerator.h
index 5ea11463ffa9f..31dead2d7b585 100644
--- a/clang/include/clang/CIR/CIRGenerator.h
+++ b/clang/include/clang/CIR/CIRGenerator.h
@@ -81,6 +81,9 @@ class CIRGenerator : public clang::ASTConsumer {
   void HandleTagDeclDefinition(clang::TagDecl *d) override;
   void HandleTagDeclRequiredDefinition(const clang::TagDecl *D) override;
   void HandleCXXStaticMemberVarInstantiation(clang::VarDecl *D) override;
+  void
+  HandleOpenACCRoutineReference(const clang::FunctionDecl *FD,
+                                const clang::OpenACCRoutineDecl *RD) override;
   void CompleteTentativeDefinition(clang::VarDecl *d) override;
   void HandleVTable(clang::CXXRecordDecl *rd) override;
 
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 4b64fc56c57ad..ae199f35cb10e 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -1173,6 +1173,35 @@ def CIR_SwitchOp : CIR_Op<"switch", [
   let hasLLVMLowering = false;
 }
 
+//===----------------------------------------------------------------------===//
+// IsConstantOp
+//===----------------------------------------------------------------------===//
+
+def CIR_IsConstantOp : CIR_Op<"is_constant", [Pure]> {
+  let summary = "Test for manifest compile-time constant";
+  let description = [{
+    Returns `true` if the argument is known to be a manifest compile-time
+    constant otherwise returns `false`. If the argument is a constant expression
+    which refers to a global (the address of which _is_ a constant, but not
+    manifest during the compile), then the intrinsic evaluates to `false`.
+
+    This is used to represent `__builtin_constant_p` in cases where the argument
+    isn't known to be constant during initial translation of the source code but
+    might be proven to be constant after later optimizations.
+
+    Example:
+    ```
+    %1 = cir.is_constant %2 : !s32i -> !cir.bool
+    ```
+  }];
+  let arguments = (ins CIR_AnyType:$val);
+  let results = (outs CIR_BoolType:$result);
+
+  let assemblyFormat = [{
+    $val `:` qualified(type($val)) `->` qualified(type($result)) attr-dict
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // SwitchFlatOp
 //===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 28c609bb8524d..84e3b4d4e59e2 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -4870,25 +4870,25 @@ def ggdb3 : Flag<["-"], "ggdb3">, Group<ggdbN_Group>;
 def glldb : Flag<["-"], "glldb">, Group<gTune_Group>;
 def gsce : Flag<["-"], "gsce">, Group<gTune_Group>;
 def gdbx : Flag<["-"], "gdbx">, Group<gTune_Group>;
-// Equivalent to our default dwarf version. Forces usual dwarf emission when
+// Equivalent to our default DWARF version. Forces usual DWARF emission when
 // CodeView is enabled.
 def gdwarf : Flag<["-"], "gdwarf">, Group<g_Group>,
   Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>,
-  HelpText<"Generate source-level debug information with the default dwarf version">;
+  HelpText<"Generate source-level debug information with the default DWARF version">;
 
 let Visibility = [ClangOption, FlangOption] in {
 def gdwarf_2 : Flag<["-"], "gdwarf-2">, Group<g_Group>,
-  HelpText<"Generate source-level debug information with dwarf version 2">;
+  HelpText<"Generate source-level debug information with DWARF version 2">;
 def gdwarf_3 : Flag<["-"], "gdwarf-3">, Group<g_Group>,
-  HelpText<"Generate source-level debug information with dwarf version 3">;
+  HelpText<"Generate source-level debug information with DWARF version 3">;
 def gdwarf_4 : Flag<["-"], "gdwarf-4">, Group<g_Group>,
-  HelpText<"Generate source-level debug information with dwarf version 4">;
+  HelpText<"Generate source-level debug information with DWARF version 4">;
 def gdwarf_5 : Flag<["-"], "gdwarf-5">, Group<g_Group>,
-  HelpText<"Generate source-level debug information with dwarf version 5">;
+  HelpText<"Generate source-level debug information with DWARF version 5">;
 def gdwarf_6
     : Flag<["-"], "gdwarf-6">,
       Group<g_Group>,
-      HelpText<"Generate source-level debug information with dwarf version 6">;
+      HelpText<"Generate source-level debug information with DWARF version 6">;
 }
 def gdwarf64 : Flag<["-"], "gdwarf64">, Group<g_Group>,
   Visibility<[ClangOption, CC1Option, CC1AsOption]>,
@@ -4915,7 +4915,7 @@ def gno_heterogeneous_dwarf : Flag<["-"], "gno-heterogeneous-dwarf">,
   HelpText<"Disable DWARF extensions for heterogeneous debugging">,
   Alias<gheterogeneous_dwarf_EQ>, AliasArgs<["disabled"]>;
 
-def gcodeview : Flag<["-"], "gcodeview">,
+def gcodeview : Flag<["-"], "gcodeview">, Group<g_Group>,
   HelpText<"Generate CodeView debug information">,
   Visibility<[ClangOption, CC1Option, CC1AsOption, CLOption, DXCOption]>,
   MarshallingInfoFlag<CodeGenOpts<"EmitCodeView">>;
@@ -4923,17 +4923,20 @@ defm codeview_ghash : BoolOption<"g", "codeview-ghash",
   CodeGenOpts<"CodeViewGHash">, DefaultFalse,
   PosFlag<SetTrue, [], [ClangOption, CC1Option],
           "Emit type record hashes in a .debug$H section">,
-  NegFlag<SetFalse>, BothFlags<[], [ClangOption, CLOption, DXCOption]>>;
+  NegFlag<SetFalse>, BothFlags<[], [ClangOption, CLOption, DXCOption]>>,
+  Group<g_flags_Group>;
 defm codeview_command_line : BoolOption<"g", "codeview-command-line",
   CodeGenOpts<"CodeViewCommandLine">, DefaultTrue,
   PosFlag<SetTrue, [], [ClangOption], "Emit compiler path and command line into CodeView debug information">,
   NegFlag<SetFalse, [], [ClangOption], "Don't emit compiler path and command line into CodeView debug information">,
-  BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option]>>;
+  BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option]>>,
+  Group<g_flags_Group>;
 defm inline_line_tables : BoolGOption<"inline-line-tables",
   CodeGenOpts<"NoInlineLineTables">, DefaultFalse,
   NegFlag<SetTrue, [], [ClangOption, CC1Option],
           "Don't emit inline line tables.">,
-  PosFlag<SetFalse>, BothFlags<[], [ClangOption, CLOption, DXCOption]>>;
+  PosFlag<SetFalse>, BothFlags<[], [ClangOption, CLOption, DXCOption]>>,
+  Group<g_flags_Group>;
 
 def gfull : Flag<["-"], "gfull">, Group<g_Group>;
 def gused : Flag<["-"], "gused">, Group<g_Group>;
@@ -4958,7 +4961,8 @@ defm strict_dwarf : BoolOption<"g", "strict-dwarf",
 defm omit_unreferenced_methods : BoolGOption<"omit-unreferenced-methods",
   CodeGenOpts<"DebugOmitUnreferencedMethods">, DefaultFalse,
   NegFlag<SetFalse>,
-  PosFlag<SetTrue, [], [CC1Option]>, BothFlags<[], [ClangOption, CLOption, DXCOption]>>;
+  PosFlag<SetTrue, [], [CC1Option]>, BothFlags<[], [ClangOption, CLOption, DXCOption]>>,
+  Group<g_flags_Group>;
 defm column_info : BoolOption<"g", "column-info",
   CodeGenOpts<"DebugColumnInfo">, DefaultTrue,
   NegFlag<SetFalse, [], [ClangOption, CC1Option]>,
@@ -5027,6 +5031,7 @@ defm structor_decl_linkage_names
                           "Attach linkage names to C++ constructor/destructor "
                           "declarations in DWARF.">,
                   BothFlags<[], [ClangOption, CLOption, CC1Option]>>,
+                  Group<g_flags_Group>,
                   DocBrief<[{On some ABIs (e.g., Itanium), constructors and destructors may have multiple variants. Historically, when generating DWARF, Clang did not attach ``DW_AT_linkage_name`` to structor DIEs because there were multiple possible manglings (depending on the structor variant) that could be used. With ``-gstructor-decl-linkage-names``, for ABIs with structor variants, we attach a "unified" mangled name to structor declarations DIEs which debuggers can use to look up all the definitions for a structor declaration. E.g., a "unified" mangled name ``_ZN3FooC4Ev`` may have multiple definitions associated with it such as ``_ZN3FooC1Ev`` and ``_ZN3FooC2Ev``.
 
 Enabling this flag results in a better interactive debugging experience (both GDB and LLDB have support for understanding these "unified" linkage names). However, it comes with a significant increase in debug-info size (particularly the `.debug_str` section). As an escape hatch, users can disable this feature using ``-gno-structor-decl-linkage-names``.}]>;
@@ -5035,7 +5040,8 @@ defm key_instructions : BoolGOption<"key-instructions",
     NegFlag<SetFalse>, PosFlag<SetTrue, [], [],
         "Enable Key Instructions, which reduces the jumpiness of debug stepping in optimized C/C++ code"
         " in some debuggers. DWARF only.">,
-    BothFlags<[], [ClangOption, CLOption, CC1Option]>>;
+    BothFlags<[], [ClangOption, CLOption, CC1Option]>>,
+  Group<g_flags_Group>;
 def headerpad__max__install__names : Joined<["-"], "headerpad_max_install_names">;
 def help : Flag<["-", "--"], "help">,
     Visibility<[ClangOption, CC1Option, CC1AsOption,
@@ -8690,7 +8696,7 @@ def main_file_name : Separate<["-"], "main-file-name">,
   Visibility<[CC1Option, CC1AsOption]>,
   MarshallingInfoString<CodeGenOpts<"MainFileName">>;
 def split_dwarf_output : Separate<["-"], "split-dwarf-output">,
-  HelpText<"File name to use for split dwarf debug info output">,
+  HelpText<"File name to use for split DWARF debug info output">,
   Visibility<[CC1Option, CC1AsOption, FC1Option]>,
   MarshallingInfoString<CodeGenOpts<"SplitDwarfOutput">>;
 
@@ -8724,7 +8730,7 @@ def dependent_lib : Joined<["--"], "dependent-lib=">,
   MarshallingInfoStringVector<CodeGenOpts<"DependentLibraries">>;
 
 def split_dwarf_file : Separate<["-"], "split-dwarf-file">,
-  HelpText<"Name of the split dwarf debug info file to encode in the object file">,
+  HelpText<"Name of the split DWARF debug info file to encode in the object file">,
   MarshallingInfoString<CodeGenOpts<"SplitDwarfFile">>;
 
 } // let Visibility = [CC1Option, FC1Option]
diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h
index f751e985ae0ff..b5e3ecab36d22 100644
--- a/clang/include/clang/Sema/SemaOpenACC.h
+++ b/clang/include/clang/Sema/SemaOpenACC.h
@@ -37,8 +37,16 @@ class Scope;
 class SemaOpenACC : public SemaBase {
 public:
   using DeclGroupPtrTy = OpaquePtr<DeclGroupRef>;
+  using RoutineRefListTy = std::pair<FunctionDecl *, OpenACCRoutineDecl *>;
 
 private:
+  // We save a list of routine clauses that refer to a different function(that
+  // is, routine-with-a-name) so that we can do the emission at the 'end'.  We
+  // have to do this, since functions can be emitted before they are referenced,
+  // and the OpenACCRoutineDecl isn't necessarily emitted, as it might be in a
+  // function/etc. So we do these emits at the end of the TU.
+  llvm::SmallVector<RoutineRefListTy> RoutineRefList;
+
   struct ComputeConstructInfo {
     /// Which type of compute construct we are inside of, which we can use to
     /// determine whether we should add loops to the above collection.  We can
@@ -752,6 +760,7 @@ class SemaOpenACC : public SemaBase {
   };
 
   SemaOpenACC(Sema &S);
+  void ActOnEndOfTranslationUnit(TranslationUnitDecl *TU);
 
   // Called when we encounter a 'while' statement, before looking at its 'body'.
   void ActOnWhileStmt(SourceLocation WhileLoc);
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index a51d8d2375cfe..1d8f27ab915e2 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -316,8 +316,10 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
       Opts["cl_amd_media_ops"] = true;
       Opts["cl_amd_media_ops2"] = true;
 
+      // FIXME: Check subtarget for image support.
       Opts["__opencl_c_images"] = true;
       Opts["__opencl_c_3d_image_writes"] = true;
+      Opts["__opencl_c_read_write_images"] = true;
       Opts["cl_khr_3d_image_writes"] = true;
       Opts["__opencl_c_program_scope_global_variables"] = true;
       Opts["__opencl_c_atomic_order_seq_cst"] = true;
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index e14b5f8aac337..12d93cf4c73c6 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -542,6 +542,45 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
     return emitCall(e->getCallee()->getType(), CIRGenCallee::forDirect(fnOp), e,
                     returnValue);
   }
+
+  case Builtin::BI__builtin_constant_p: {
+    mlir::Type resultType = convertType(e->getType());
+
+    const Expr *arg = e->getArg(0);
+    QualType argType = arg->getType();
+    // FIXME: The allowance for Obj-C pointers and block pointers is historical
+    // and likely a mistake.
+    if (!argType->isIntegralOrEnumerationType() && !argType->isFloatingType() &&
+        !argType->isObjCObjectPointerType() && !argType->isBlockPointerType()) {
+      // Per the GCC documentation, only numeric constants are recognized after
+      // inlining.
+      return RValue::get(
+          builder.getConstInt(getLoc(e->getSourceRange()),
+                              mlir::cast<cir::IntType>(resultType), 0));
+    }
+
+    if (arg->HasSideEffects(getContext())) {
+      // The argument is unevaluated, so be conservative if it might have
+      // side-effects.
+      return RValue::get(
+          builder.getConstInt(getLoc(e->getSourceRange()),
+                              mlir::cast<cir::IntType>(resultType), 0));
+    }
+
+    mlir::Value argValue = emitScalarExpr(arg);
+    if (argType->isObjCObjectPointerType()) {
+      cgm.errorNYI(e->getSourceRange(),
+                   "__builtin_constant_p: Obj-C object pointer");
+      return {};
+    }
+    argValue = builder.createBitcast(argValue, convertType(argType));
+
+    mlir::Value result = cir::IsConstantOp::create(
+        builder, getLoc(e->getSourceRange()), argValue);
+    // IsConstantOp returns a bool, but __builtin_constant_p returns an int.
+    result = builder.createBoolToInt(result, resultType);
+    return RValue::get(result);
+  }
   case Builtin::BI__builtin_dynamic_object_size:
   case Builtin::BI__builtin_object_size: {
     unsigned type =
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 8d01b7dbd15f6..224a182ed17d1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -100,6 +100,44 @@ static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
   return builder.createBitcast(resVec, ops[0].getType());
 }
 
+static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder,
+                                     mlir::Location loc,
+                                     const std::string &intrinsicName,
+                                     SmallVectorImpl<mlir::Value> &ops) {
+  unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
+
+  // Convert both operands to mask vectors.
+  mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems);
+  mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems);
+
+  mlir::Type i32Ty = builder.getSInt32Ty();
+
+  // Create indices for extracting the first half of each vector.
+  SmallVector<mlir::Attribute, 32> halfIndices;
+  for (auto i : llvm::seq<unsigned>(0, numElems / 2))
+    halfIndices.push_back(cir::IntAttr::get(i32Ty, i));
+
+  // Extract first half of each vector. This gives better codegen than
+  // doing it in a single shuffle.
+  mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices);
+  mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices);
+
+  // Create indices for concatenating the vectors.
+  // NOTE: Operands are swapped to match the intrinsic definition.
+  // After the half extraction, both vectors have numElems/2 elements.
+  // In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1]
+  // select from rhsHalf, and indices [numElems/2..numElems-1] select from
+  // lhsHalf.
+  SmallVector<mlir::Attribute, 64> concatIndices;
+  for (auto i : llvm::seq<unsigned>(0, numElems))
+    concatIndices.push_back(cir::IntAttr::get(i32Ty, i));
+
+  // Concat the vectors (RHS first, then LHS).
+  mlir::Value res =
+      builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices);
+  return builder.createBitcast(res, ops[0].getType());
+}
+
 static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder,
                                     mlir::Location loc,
                                     cir::BinOpKind binOpKind,
@@ -257,7 +295,15 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
     return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1],
                          ops[2]);
   }
-
+  case X86::BI__builtin_ia32_kunpckhi:
+    return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
+                             "x86.avx512.kunpackb", ops);
+  case X86::BI__builtin_ia32_kunpcksi:
+    return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
+                             "x86.avx512.kunpackw", ops);
+  case X86::BI__builtin_ia32_kunpckdi:
+    return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
+                             "x86.avx512.kunpackd", ops);
   case X86::BI_mm_setcsr:
   case X86::BI__builtin_ia32_ldmxcsr: {
     mlir::Location loc = getLoc(expr->getExprLoc());
@@ -947,9 +993,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
         getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts);
     return builder.createBitcast(resVec, ops[0].getType());
   }
-  case X86::BI__builtin_ia32_kunpckdi:
-  case X86::BI__builtin_ia32_kunpcksi:
-  case X86::BI__builtin_ia32_kunpckhi:
   case X86::BI__builtin_ia32_sqrtsh_round_mask:
   case X86::BI__builtin_ia32_sqrtsd_round_mask:
   case X86::BI__builtin_ia32_sqrtss_round_mask:
diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
index c98d9bb0724f6..ca9fe939139cd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
@@ -126,8 +126,7 @@ static void emitMemberInitializer(CIRGenFunction &cgf,
                             lhs.isVolatileQualified());
       // Ensure that we destroy the objects if an exception is thrown later in
       // the constructor.
-      QualType::DestructionKind dtorKind = fieldType.isDestructedType();
-      assert(!cgf.needsEHCleanup(dtorKind) &&
+      assert(!cgf.needsEHCleanup(fieldType.isDestructedType()) &&
              "Arrays of non-record types shouldn't need EH cleanup");
       return;
     }
diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
index d52986db49ea6..a5322ac4e1930 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
@@ -287,9 +287,92 @@ void CIRGenModule::emitGlobalOpenACCDeclareDecl(const OpenACCDeclareDecl *d) {
 }
 
 void CIRGenFunction::emitOpenACCRoutine(const OpenACCRoutineDecl &d) {
-  getCIRGenModule().errorNYI(d.getSourceRange(), "OpenACC Routine Construct");
+  // Do nothing here. The OpenACCRoutineDeclAttr handles the implicit name
+  // cases, and the end-of-TU handling manages the named cases. This is
+  // necessary because these references aren't necessarily emitted themselves,
+  // but can be named anywhere.
 }
 
 void CIRGenModule::emitGlobalOpenACCRoutineDecl(const OpenACCRoutineDecl *d) {
-  errorNYI(d->getSourceRange(), "OpenACC Global Routine Construct");
+  // Do nothing here. The OpenACCRoutineDeclAttr handles the implicit name
+  // cases, and the end-of-TU handling manages the named cases. This is
+  // necessary because these references aren't necessarily emitted themselves,
+  // but can be named anywhere.
+}
+
+namespace {
+class OpenACCRoutineClauseEmitter final
+    : public OpenACCClauseVisitor<OpenACCRoutineClauseEmitter> {
+  CIRGen::CIRGenBuilderTy &builder;
+  mlir::acc::RoutineOp routineOp;
+  llvm::SmallVector<mlir::acc::DeviceType> lastDeviceTypeValues;
+
+public:
+  OpenACCRoutineClauseEmitter(CIRGen::CIRGenBuilderTy &builder,
+                              mlir::acc::RoutineOp routineOp)
+      : builder(builder), routineOp(routineOp) {}
+
+  void emitClauses(ArrayRef<const OpenACCClause *> clauses) {
+    this->VisitClauseList(clauses);
+  }
+
+  void VisitClause(const OpenACCClause &clause) {
+    llvm_unreachable("Invalid OpenACC clause on routine");
+  }
+
+  void VisitSeqClause(const OpenACCSeqClause &clause) {
+    routineOp.addSeq(builder.getContext(), lastDeviceTypeValues);
+  }
+  void VisitWorkerClause(const OpenACCWorkerClause &clause) {
+    routineOp.addWorker(builder.getContext(), lastDeviceTypeValues);
+  }
+  void VisitVectorClause(const OpenACCVectorClause &clause) {
+    routineOp.addVector(builder.getContext(), lastDeviceTypeValues);
+  }
+
+  void VisitNoHostClause(const OpenACCNoHostClause &clause) {
+    routineOp.setNohost(/*attrValue=*/true);
+  }
+};
+} // namespace
+
+void CIRGenModule::emitOpenACCRoutineDecl(
+    const clang::FunctionDecl *funcDecl, cir::FuncOp func,
+    SourceLocation pragmaLoc, ArrayRef<const OpenACCClause *> clauses) {
+  mlir::OpBuilder::InsertionGuard guardCase(builder);
+  // These need to appear at the global module.
+  builder.setInsertionPointToEnd(&getModule().getBodyRegion().front());
+
+  mlir::Location routineLoc = getLoc(pragmaLoc);
+
+  std::stringstream routineNameSS;
+  // This follows the same naming format as Flang.
+  routineNameSS << "acc_routine_" << routineCounter++;
+  std::string routineName = routineNameSS.str();
+
+  // There isn't a good constructor for RoutineOp that just takes a location +
+  // name + function, so we use one that creates an otherwise RoutineOp and
+  // count on the visitor/emitter to fill these in.
+  auto routineOp = mlir::acc::RoutineOp::create(
+      builder, routineLoc, routineName,
+      mlir::SymbolRefAttr::get(builder.getContext(), func.getName()),
+      /*implicit=*/false);
+
+  // We have to add a pointer going the other direction via an acc.routine_info,
+  // from the func to the routine.
+  llvm::SmallVector<mlir::SymbolRefAttr> funcRoutines;
+  if (auto routineInfo =
+          func.getOperation()->getAttrOfType<mlir::acc::RoutineInfoAttr>(
+              mlir::acc::getRoutineInfoAttrName()))
+    funcRoutines.append(routineInfo.getAccRoutines().begin(),
+                        routineInfo.getAccRoutines().end());
+
+  funcRoutines.push_back(
+      mlir::SymbolRefAttr::get(builder.getContext(), routineName));
+  func.getOperation()->setAttr(
+      mlir::acc::getRoutineInfoAttrName(),
+      mlir::acc::RoutineInfoAttr::get(func.getContext(), funcRoutines));
+
+  OpenACCRoutineClauseEmitter emitter{builder, routineOp};
+  emitter.emitClauses(clauses);
 }
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 03bbfbffce717..1d8e4a3b444ee 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -2227,6 +2227,15 @@ CIRGenModule::createCIRFunction(mlir::Location loc, StringRef name,
 
     if (!cgf)
       theModule.push_back(func);
+
+    if (this->getLangOpts().OpenACC) {
+      // We only have to handle this attribute, since OpenACCAnnotAttrs are
+      // handled via the end-of-TU work.
+      for (const auto *attr :
+           funcDecl->specific_attrs<OpenACCRoutineDeclAttr>())
+        emitOpenACCRoutineDecl(funcDecl, func, attr->getLocation(),
+                               attr->Clauses);
+    }
   }
   return func;
 }
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 6600d086f8f61..d7aee8ebf4d7a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -461,6 +461,12 @@ class CIRGenModule : public CIRGenTypeCache {
                                             OpenACCModifierKind modifiers,
                                             bool structured, bool implicit,
                                             bool requiresDtor);
+  // Each of the acc.routine operations must have a unique name, so we just use
+  // an integer counter.  This is how Flang does it, so it seems reasonable.
+  unsigned routineCounter = 0;
+  void emitOpenACCRoutineDecl(const clang::FunctionDecl *funcDecl,
+                              cir::FuncOp func, SourceLocation pragmaLoc,
+                              ArrayRef<const OpenACCClause *> clauses);
 
   // C++ related functions.
   void emitDeclContext(const DeclContext *dc);
diff --git a/clang/lib/CIR/CodeGen/CIRGenerator.cpp b/clang/lib/CIR/CodeGen/CIRGenerator.cpp
index aa4d9eba35c04..0208eeea7146a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenerator.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenerator.cpp
@@ -166,6 +166,18 @@ void CIRGenerator::HandleCXXStaticMemberVarInstantiation(VarDecl *D) {
   cgm->handleCXXStaticMemberVarInstantiation(D);
 }
 
+void CIRGenerator::HandleOpenACCRoutineReference(const FunctionDecl *FD,
+                                                 const OpenACCRoutineDecl *RD) {
+  llvm::StringRef mangledName = cgm->getMangledName(FD);
+  cir::FuncOp entry =
+      mlir::dyn_cast_if_present<cir::FuncOp>(cgm->getGlobalValue(mangledName));
+
+  // if this wasn't generated, don't force it to be.
+  if (!entry)
+    return;
+  cgm->emitOpenACCRoutineDecl(FD, entry, RD->getBeginLoc(), RD->clauses());
+}
+
 void CIRGenerator::CompleteTentativeDefinition(VarDecl *d) {
   if (diags.hasErrorOccurred())
     return;
diff --git a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
index 67bb5657d4001..daec8ae409e0f 100644
--- a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
+++ b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
@@ -88,6 +88,11 @@ class CIRGenConsumer : public clang::ASTConsumer {
     Gen->HandleCXXStaticMemberVarInstantiation(VD);
   }
 
+  void HandleOpenACCRoutineReference(const FunctionDecl *FD,
+                                     const OpenACCRoutineDecl *RD) override {
+    Gen->HandleOpenACCRoutineReference(FD, RD);
+  }
+
   void HandleInlineFunctionDefinition(FunctionDecl *D) override {
     Gen->HandleInlineFunctionDefinition(D);
   }
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 8e9780754f68f..40e14474890dc 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -3979,6 +3979,13 @@ mlir::LogicalResult CIRToLLVMGetBitfieldOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+mlir::LogicalResult CIRToLLVMIsConstantOpLowering::matchAndRewrite(
+    cir::IsConstantOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  rewriter.replaceOpWithNewOp<mlir::LLVM::IsConstantOp>(op, adaptor.getVal());
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMInlineAsmOpLowering::matchAndRewrite(
     cir::InlineAsmOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 423be1826aa47..c2ee8d0fd1748 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -6356,8 +6356,15 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) {
 LValue CodeGenFunction::EmitHLSLArrayAssignLValue(const BinaryOperator *E) {
   // Don't emit an LValue for the RHS because it might not be an LValue
   LValue LHS = EmitLValue(E->getLHS());
+
+  // If the RHS is a global resource array, copy all individual resources
+  // into LHS.
+  if (E->getRHS()->getType()->isHLSLResourceRecordArray())
+    if (CGM.getHLSLRuntime().emitResourceArrayCopy(LHS, E->getRHS(), *this))
+      return LHS;
+
   // In C the RHS of an assignment operator is an RValue.
-  // EmitAggregateAssign takes anan LValue for the RHS. Instead we can call
+  // EmitAggregateAssign takes an LValue for the RHS. Instead we can call
   // EmitInitializationToLValue to emit an RValue into an LValue.
   EmitInitializationToLValue(E->getRHS(), LHS);
   return LHS;
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 0c3701eb01679..f485fdd49e43f 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -22,6 +22,7 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attrs.inc"
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/HLSLResource.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Type.h"
@@ -94,6 +95,14 @@ void addRootSignatureMD(llvm::dxbc::RootSignatureVersion RootSigVer,
   RootSignatureValMD->addOperand(MDVals);
 }
 
+// Find array variable declaration from DeclRef expression
+static const ValueDecl *getArrayDecl(const Expr *E) {
+  if (const DeclRefExpr *DRE =
+          dyn_cast_or_null<DeclRefExpr>(E->IgnoreImpCasts()))
+    return DRE->getDecl();
+  return nullptr;
+}
+
 // Find array variable declaration from nested array subscript AST nodes
 static const ValueDecl *getArrayDecl(const ArraySubscriptExpr *ASE) {
   const Expr *E = nullptr;
@@ -103,9 +112,7 @@ static const ValueDecl *getArrayDecl(const ArraySubscriptExpr *ASE) {
       return nullptr;
     ASE = dyn_cast<ArraySubscriptExpr>(E);
   }
-  if (const DeclRefExpr *DRE = dyn_cast_or_null<DeclRefExpr>(E))
-    return DRE->getDecl();
-  return nullptr;
+  return getArrayDecl(E);
 }
 
 // Get the total size of the array, or -1 if the array is unbounded.
@@ -1214,12 +1221,13 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr(
           ArraySubsExpr->getType()->isHLSLResourceRecordArray()) &&
          "expected resource array subscript expression");
 
-  // Let clang codegen handle local resource array subscripts,
+  // Let clang codegen handle local and static resource array subscripts,
   // or when the subscript references on opaque expression (as part of
   // ArrayInitLoopExpr AST node).
   const VarDecl *ArrayDecl =
       dyn_cast_or_null<VarDecl>(getArrayDecl(ArraySubsExpr));
-  if (!ArrayDecl || !ArrayDecl->hasGlobalStorage())
+  if (!ArrayDecl || !ArrayDecl->hasGlobalStorage() ||
+      ArrayDecl->getStorageClass() == SC_Static)
     return std::nullopt;
 
   // get the resource array type
@@ -1249,7 +1257,7 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr(
   // Find binding info for the resource array. For implicit binding
   // an HLSLResourceBindingAttr should have been added by SemaHLSL.
   ResourceBindingAttrs Binding(ArrayDecl);
-  assert((Binding.hasBinding()) &&
+  assert(Binding.hasBinding() &&
          "resource array must have a binding attribute");
 
   // Find the individual resource type.
@@ -1305,6 +1313,49 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr(
   return CGF.MakeAddrLValue(TmpVar, ResultTy, AlignmentSource::Decl);
 }
 
+// If RHSExpr is a global resource array, initialize all of its resources and
+// set them into LHS. Returns false if no copy has been performed and the
+// array copy should be handled by Clang codegen.
+bool CGHLSLRuntime::emitResourceArrayCopy(LValue &LHS, Expr *RHSExpr,
+                                          CodeGenFunction &CGF) {
+  QualType ResultTy = RHSExpr->getType();
+  assert(ResultTy->isHLSLResourceRecordArray() && "expected resource array");
+
+  // Let Clang codegen handle local and static resource array copies.
+  const VarDecl *ArrayDecl = dyn_cast_or_null<VarDecl>(getArrayDecl(RHSExpr));
+  if (!ArrayDecl || !ArrayDecl->hasGlobalStorage() ||
+      ArrayDecl->getStorageClass() == SC_Static)
+    return false;
+
+  // Find binding info for the resource array. For implicit binding
+  // the HLSLResourceBindingAttr should have been added by SemaHLSL.
+  ResourceBindingAttrs Binding(ArrayDecl);
+  assert(Binding.hasBinding() &&
+         "resource array must have a binding attribute");
+
+  // Find the individual resource type.
+  ASTContext &AST = ArrayDecl->getASTContext();
+  QualType ResTy = AST.getBaseElementType(ResultTy);
+  const auto *ResArrayTy = cast<ConstantArrayType>(ResultTy.getTypePtr());
+
+  // Use the provided LHS for the result.
+  AggValueSlot ValueSlot = AggValueSlot::forAddr(
+      LHS.getAddress(), Qualifiers(), AggValueSlot::IsDestructed_t(true),
+      AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsAliased_t(false),
+      AggValueSlot::DoesNotOverlap);
+
+  // Create Value for index and total array size (= range size).
+  int Size = getTotalArraySize(AST, ResArrayTy);
+  llvm::Value *Zero = llvm::ConstantInt::get(CGM.IntTy, 0);
+  llvm::Value *Range = llvm::ConstantInt::get(CGM.IntTy, Size);
+
+  // Initialize individual resources in the array into LHS.
+  std::optional<llvm::Value *> EndIndex = initializeLocalResourceArray(
+      CGF, ResTy->getAsCXXRecordDecl(), ResArrayTy, ValueSlot, Range, Zero,
+      ArrayDecl->getName(), Binding, {Zero}, RHSExpr->getExprLoc());
+  return EndIndex.has_value();
+}
+
 std::optional<LValue> CGHLSLRuntime::emitBufferArraySubscriptExpr(
     const ArraySubscriptExpr *E, CodeGenFunction &CGF,
     llvm::function_ref<llvm::Value *(bool Promote)> EmitIdxAfterBase) {
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 77f43e8766745..c7cd668419d10 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -258,6 +258,7 @@ class CGHLSLRuntime {
   std::optional<LValue>
   emitResourceArraySubscriptExpr(const ArraySubscriptExpr *E,
                                  CodeGenFunction &CGF);
+  bool emitResourceArrayCopy(LValue &LHS, Expr *RHSExpr, CodeGenFunction &CGF);
 
   std::optional<LValue> emitBufferArraySubscriptExpr(
       const ArraySubscriptExpr *E, CodeGenFunction &CGF,
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index eda65739771cf..8d14ab7d6dccd 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -5983,7 +5983,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
              (D->getType()->isHLSLResourceRecord() ||
               D->getType()->isHLSLResourceRecordArray())) {
     Init = llvm::PoisonValue::get(getTypes().ConvertType(ASTTy));
-    NeedsGlobalCtor = D->getType()->isHLSLResourceRecord();
+    NeedsGlobalCtor = D->getType()->isHLSLResourceRecord() ||
+                      D->getStorageClass() == SC_Static;
   } else if (D->hasAttr<LoaderUninitializedAttr>()) {
     Init = llvm::UndefValue::get(getTypes().ConvertTypeForMem(ASTTy));
   } else if (!InitExpr) {
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 325f62cf33444..71e74613973c5 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -1502,6 +1502,9 @@ void Sema::ActOnEndOfTranslationUnit() {
 
   if (LangOpts.HLSL)
     HLSL().ActOnEndOfTranslationUnit(getASTContext().getTranslationUnitDecl());
+  if (LangOpts.OpenACC)
+    OpenACC().ActOnEndOfTranslationUnit(
+        getASTContext().getTranslationUnitDecl());
 
   // If there were errors, disable 'unused' warnings since they will mostly be
   // noise. Don't warn for a use from a module: either we should warn on all
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 89645e3b67db3..82b964c478dc7 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -4018,7 +4018,9 @@ void SemaHLSL::ActOnVariableDeclarator(VarDecl *VD) {
     // process explicit bindings
     processExplicitBindingsOnDecl(VD);
 
-    if (VD->getType()->isHLSLResourceRecordArray()) {
+    // Add implicit binding attribute to non-static resource arrays.
+    if (VD->getType()->isHLSLResourceRecordArray() &&
+        VD->getStorageClass() != SC_Static) {
       // If the resource array does not have an explicit binding attribute,
       // create an implicit one. It will be used to transfer implicit binding
       // order_ID to codegen.
@@ -4212,8 +4214,8 @@ bool SemaHLSL::ActOnUninitializedVarDecl(VarDecl *VD) {
   if (VD->getType().getAddressSpace() == LangAS::hlsl_constant)
     return true;
 
-  // Initialize resources at the global scope
-  if (VD->hasGlobalStorage()) {
+  // Initialize non-static resources at the global scope.
+  if (VD->hasGlobalStorage() && VD->getStorageClass() != SC_Static) {
     const Type *Ty = VD->getType().getTypePtr();
     if (Ty->isHLSLResourceRecord())
       return initGlobalResourceDecl(VD);
@@ -4237,10 +4239,10 @@ bool SemaHLSL::CheckResourceBinOp(BinaryOperatorKind Opc, Expr *LHSExpr,
   while (auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
     E = ASE->getBase()->IgnoreParenImpCasts();
 
-  // Report error if LHS is a resource declared at a global scope.
+  // Report error if LHS is a non-static resource declared at a global scope.
   if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParens())) {
     if (VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
-      if (VD->hasGlobalStorage()) {
+      if (VD->hasGlobalStorage() && VD->getStorageClass() != SC_Static) {
         // assignment to global resource is not allowed
         SemaRef.Diag(Loc, diag::err_hlsl_assign_to_global_resource) << VD;
         SemaRef.Diag(VD->getLocation(), diag::note_var_declared_here) << VD;
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index f0f3832e160cd..1115efbb8305c 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Sema/SemaOpenACC.h"
+#include "clang/AST/ASTConsumer.h"
 #include "clang/AST/DeclOpenACC.h"
 #include "clang/AST/StmtOpenACC.h"
 #include "clang/Basic/DiagnosticSema.h"
@@ -2457,7 +2458,8 @@ OpenACCRoutineDecl *SemaOpenACC::CheckRoutineDecl(
     ArrayRef<const OpenACCClause *> Clauses, SourceLocation EndLoc) {
   assert(LParenLoc.isValid());
 
-  if (FunctionDecl *FD = getFunctionFromRoutineName(FuncRef)) {
+  FunctionDecl *FD = nullptr;
+  if ((FD = getFunctionFromRoutineName(FuncRef))) {
     // OpenACC 3.3 2.15:
     // In C and C++, function static variables are not supported in functions to
     // which a routine directive applies.
@@ -2509,11 +2511,9 @@ OpenACCRoutineDecl *SemaOpenACC::CheckRoutineDecl(
                                                         {DirLoc, BindLoc});
     FD->addAttr(RAA);
     // In case we are referencing not the 'latest' version, make sure we add
-    // the attribute to all declarations.
-    while (FD != FD->getMostRecentDecl()) {
-      FD = FD->getMostRecentDecl();
-      FD->addAttr(RAA);
-    }
+    // the attribute to all declarations after the 'found' one.
+    for (auto *CurFD : FD->redecls())
+      CurFD->addAttr(RAA->clone(getASTContext()));
   }
 
   LastRoutineDecl = OpenACCRoutineDecl::Create(
@@ -2522,9 +2522,20 @@ OpenACCRoutineDecl *SemaOpenACC::CheckRoutineDecl(
   LastRoutineDecl->setAccess(AS_public);
   getCurContext()->addDecl(LastRoutineDecl);
 
+  if (FD) {
+    // Add this attribute to the list of annotations so that codegen can visit
+    // it later. FD doesn't necessarily exist, but that case should be
+    // diagnosed.
+    RoutineRefList.emplace_back(FD, LastRoutineDecl);
+  }
   return LastRoutineDecl;
 }
 
+void SemaOpenACC::ActOnEndOfTranslationUnit(TranslationUnitDecl *TU) {
+  for (auto [FD, RoutineDecl] : RoutineRefList)
+    SemaRef.Consumer.HandleOpenACCRoutineReference(FD, RoutineDecl);
+}
+
 DeclGroupRef SemaOpenACC::ActOnEndRoutineDeclDirective(
     SourceLocation StartLoc, SourceLocation DirLoc, SourceLocation LParenLoc,
     Expr *ReferencedFunc, SourceLocation RParenLoc,
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
index 4863ba0bd8848..774e1452d10fa 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
@@ -465,3 +465,57 @@ __mmask64 test_kmov_q(__mmask64 A) {
 
   return __builtin_ia32_kmovq(A);
 }
+
+__mmask32 test_mm512_kunpackw(__mmask32 A, __mmask32 B) {
+  // CIR-LABEL: _mm512_kunpackw
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.vec.shuffle
+  // CIR: cir.vec.shuffle
+  // CIR: cir.vec.shuffle
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<u, 1>> -> !u32i
+
+  // LLVM-LABEL: _mm512_kunpackw
+  // LLVM: [[A_VEC:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: [[B_VEC:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: [[A_HALF:%.*]] = shufflevector <32 x i1> [[A_VEC]], <32 x i1> [[A_VEC]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // LLVM: [[B_HALF:%.*]] = shufflevector <32 x i1> [[B_VEC]], <32 x i1> [[B_VEC]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // LLVM: [[RES:%.*]] = shufflevector <16 x i1> [[B_HALF]], <16 x i1> [[A_HALF]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  // LLVM: bitcast <32 x i1> [[RES]] to i32
+
+  // OGCG-LABEL: _mm512_kunpackw
+  // OGCG: [[A_VEC:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: [[B_VEC:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: [[A_HALF:%.*]] = shufflevector <32 x i1> [[A_VEC]], <32 x i1> [[A_VEC]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // OGCG: [[B_HALF:%.*]] = shufflevector <32 x i1> [[B_VEC]], <32 x i1> [[B_VEC]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // OGCG: [[RES:%.*]] = shufflevector <16 x i1> [[B_HALF]], <16 x i1> [[A_HALF]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  // OGCG: bitcast <32 x i1> [[RES]] to i32
+  return _mm512_kunpackw(A, B);
+}
+
+__mmask64 test_mm512_kunpackd(__mmask64 A, __mmask64 B) {
+  // CIR-LABEL: _mm512_kunpackd
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.vec.shuffle
+  // CIR: cir.vec.shuffle
+  // CIR: cir.vec.shuffle
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<u, 1>> -> !u64i
+
+  // LLVM-LABEL: _mm512_kunpackd
+  // LLVM: [[A_VEC:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: [[B_VEC:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: [[A_HALF:%.*]] = shufflevector <64 x i1> [[A_VEC]], <64 x i1> [[A_VEC]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  // LLVM: [[B_HALF:%.*]] = shufflevector <64 x i1> [[B_VEC]], <64 x i1> [[B_VEC]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  // LLVM: [[RES:%.*]] = shufflevector <32 x i1> [[B_HALF]], <32 x i1> [[A_HALF]], <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+  // LLVM: bitcast <64 x i1> [[RES]] to i64
+
+  // OGCG-LABEL: _mm512_kunpackd
+  // OGCG: [[A_VEC:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: [[B_VEC:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: [[A_HALF:%.*]] = shufflevector <64 x i1> [[A_VEC]], <64 x i1> [[A_VEC]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  // OGCG: [[B_HALF:%.*]] = shufflevector <64 x i1> [[B_VEC]], <64 x i1> [[B_VEC]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  // OGCG: [[RES:%.*]] = shufflevector <32 x i1> [[B_HALF]], <32 x i1> [[A_HALF]], <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+  // OGCG: bitcast <64 x i1> [[RES]] to i64
+  return _mm512_kunpackd(A, B);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
index 9d957f5de554d..e03109510a931 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
@@ -228,6 +228,33 @@ __mmask16 test_kmov_w(__mmask16 A) {
   // OGCG: bitcast <16 x i1> {{.*}} to i16
   return __builtin_ia32_kmovw(A);
 }
+
+__mmask16 test_mm512_kunpackb(__mmask16 A, __mmask16 B) {
+  // CIR-LABEL: _mm512_kunpackb
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.vec.shuffle
+  // CIR: cir.vec.shuffle
+  // CIR: cir.vec.shuffle
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !cir.int<u, 1>> -> !u16i
+
+  // LLVM-LABEL: _mm512_kunpackb
+  // LLVM: [[A_VEC:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: [[B_VEC:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: [[A_HALF:%.*]] = shufflevector <16 x i1> [[A_VEC]], <16 x i1> [[A_VEC]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // LLVM: [[B_HALF:%.*]] = shufflevector <16 x i1> [[B_VEC]], <16 x i1> [[B_VEC]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i1> [[B_HALF]], <8 x i1> [[A_HALF]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // LLVM: bitcast <16 x i1> [[RES]] to i16
+
+  // OGCG-LABEL: _mm512_kunpackb
+  // OGCG: [[A_VEC:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: [[B_VEC:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: [[A_HALF:%.*]] = shufflevector <16 x i1> [[A_VEC]], <16 x i1> [[A_VEC]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG: [[B_HALF:%.*]] = shufflevector <16 x i1> [[B_VEC]], <16 x i1> [[B_VEC]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG: [[RES:%.*]] = shufflevector <8 x i1> [[B_HALF]], <8 x i1> [[A_HALF]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // OGCG: bitcast <16 x i1> [[RES]] to i16
+  return _mm512_kunpackb(A, B);
+}
 __m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) {
   // CIR-LABEL: test_mm512_i64gather_ps
   // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qps.512"
diff --git a/clang/test/CIR/CodeGenBuiltins/builtin-constant-p.c b/clang/test/CIR/CodeGenBuiltins/builtin-constant-p.c
new file mode 100644
index 0000000000000..d684659216cba
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/builtin-constant-p.c
@@ -0,0 +1,281 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+int a = 42;
+
+/* --- Compound literals */
+
+struct foo { int x, y; };
+
+int y;
+struct foo f = (struct foo){ __builtin_constant_p(y), 42 };
+
+// CIR: cir.global external @f = #cir.const_record<{#cir.int<0> : !s32i, #cir.int<42> : !s32i}> : !rec_foo
+// LLVM: @f = global %struct.foo { i32 0, i32 42 }
+// OGCG: @f = global %struct.foo { i32 0, i32 42 }
+
+struct foo test0(int expr) {
+  struct foo f = (struct foo){ __builtin_constant_p(expr), 42 };
+  return f;
+}
+
+// CIR: cir.func {{.*}} @test0(%[[ARG0:.*]]: !s32i {{.*}}) -> !rec_foo
+// CIR:   %[[EXPR_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["expr", init]
+// CIR:   cir.store %[[ARG0]], %[[EXPR_ADDR]]
+// CIR:   %[[EXPR:.*]] = cir.load{{.*}} %[[EXPR_ADDR]]
+// CIR:   %[[IS_CONSTANT:.*]] = cir.is_constant %[[EXPR]] : !s32i -> !cir.bool
+
+// LLVM: define{{.*}} %struct.foo @test0(i32 %[[ARG0:.*]])
+// LLVM:   %[[EXPR_ADDR:.*]] = alloca i32
+// LLVM:   store i32 %[[ARG0]], ptr %[[EXPR_ADDR]]
+// LLVM:   %[[EXPR:.*]] = load i32, ptr %[[EXPR_ADDR]]
+// LLVM:   %[[IS_CONSTANT:.*]] = call i1 @llvm.is.constant.i32(i32 %[[EXPR]])
+
+// OGCG: define{{.*}} i64 @test0(i32 {{.*}} %[[ARG0:.*]])
+// OGCG:   %[[EXPR_ADDR:.*]] = alloca i32
+// OGCG:   store i32 %[[ARG0]], ptr %[[EXPR_ADDR]]
+// OGCG:   %[[EXPR:.*]] = load i32, ptr %[[EXPR_ADDR]]
+// OGCG:   %[[IS_CONSTANT:.*]] = call i1 @llvm.is.constant.i32(i32 %[[EXPR]])
+
+/* --- Pointer types */
+
+int test1(void) {
+  return __builtin_constant_p(&a - 13);
+}
+
+// CIR: cir.func {{.*}} @test1() -> !s32i
+// CIR:   %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR:   cir.store %[[ZERO]], %[[TMP1]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return %[[TMP2]] : !s32i
+
+// LLVM: define{{.*}} i32 @test1()
+// LLVM:   %[[TMP1:.*]] = alloca i32
+// LLVM:   store i32 0, ptr %[[TMP1]]
+// LLVM:   %[[TMP2:.*]] = load i32, ptr %[[TMP1]]
+// LLVM:   ret i32 %[[TMP2]]
+
+// OGCG: define{{.*}} i32 @test1()
+// OGCG:   ret i32 0
+
+/* --- Aggregate types */
+
+int b[] = {1, 2, 3};
+
+int test2(void) {
+  return __builtin_constant_p(b);
+}
+
+// CIR: cir.func {{.*}} @test2() -> !s32i
+// CIR:   %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR:   cir.store %[[ZERO]], %[[TMP1]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return %[[TMP2]] : !s32i
+
+// LLVM: define{{.*}} i32 @test2()
+// LLVM:   %[[TMP1:.*]] = alloca i32
+// LLVM:   store i32 0, ptr %[[TMP1]]
+// LLVM:   %[[TMP2:.*]] = load i32, ptr %[[TMP1]]
+// LLVM:   ret i32 %[[TMP2]]
+
+// OGCG: define{{.*}} i32 @test2()
+// OGCG:   ret i32 0
+
+const char test3_c[] = {1, 2, 3, 0};
+
+int test3(void) {
+  return __builtin_constant_p(test3_c);
+}
+
+// CIR: cir.func {{.*}} @test3() -> !s32i
+// CIR:   %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR:   cir.store %[[ZERO]], %[[TMP1]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return %[[TMP2]] : !s32i
+
+// LLVM: define{{.*}} i32 @test3()
+// LLVM:   %[[TMP1:.*]] = alloca i32
+// LLVM:   store i32 0, ptr %[[TMP1]]
+// LLVM:   %[[TMP2:.*]] = load i32, ptr %[[TMP1]]
+// LLVM:   ret i32 %[[TMP2]]
+
+// OGCG: define{{.*}} i32 @test3()
+// OGCG:   ret i32 0
+
+inline char test4_i(const char *x) {
+  return x[1];
+}
+
+int test4(void) {
+  return __builtin_constant_p(test4_i(test3_c));
+}
+
+// CIR: cir.func {{.*}} @test4() -> !s32i
+// CIR:   %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR:   cir.store %[[ZERO]], %[[TMP1]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return %[[TMP2]] : !s32i
+
+// LLVM: define{{.*}} i32 @test4()
+// LLVM:   %[[TMP1:.*]] = alloca i32
+// LLVM:   store i32 0, ptr %[[TMP1]]
+// LLVM:   %[[TMP2:.*]] = load i32, ptr %[[TMP1]]
+// LLVM:   ret i32 %[[TMP2]]
+
+// OGCG: define{{.*}} i32 @test4()
+// OGCG:   ret i32 0
+
+/* --- Constant global variables */
+
+const int c = 42;
+
+int test5(void) {
+  return __builtin_constant_p(c);
+}
+
+// CIR: cir.func {{.*}} @test5() -> !s32i
+// CIR:   %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+// CIR:   cir.store %[[ONE]], %[[TMP1]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return %[[TMP2]] : !s32i
+
+// LLVM: define{{.*}} i32 @test5()
+// LLVM:   %[[TMP1:.*]] = alloca i32
+// LLVM:   store i32 1, ptr %[[TMP1]]
+// LLVM:   %[[TMP2:.*]] = load i32, ptr %[[TMP1]]
+// LLVM:   ret i32 %[[TMP2]]
+
+// OGCG: define{{.*}} i32 @test5()
+// OGCG:   ret i32 1
+
+/* --- Array types */
+
+int arr[] = { 1, 2, 3 };
+
+int test6(void) {
+  return __builtin_constant_p(arr[2]);
+}
+
+// CIR: cir.func {{.*}} @test6() -> !s32i
+// CIR:   %[[TWO:.*]] = cir.const #cir.int<2> : !s32i
+// CIR:   %[[ARR:.*]] = cir.get_global @arr : !cir.ptr<!cir.array<!s32i x 3>>
+// CIR:   %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!s32i x 3>> -> !cir.ptr<!s32i>
+// CIR:   %[[ELE_PTR:.*]] = cir.ptr_stride %[[ARR_PTR]], %[[TWO]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+// CIR:   %[[ELE:.*]] = cir.load{{.*}} %[[ELE_PTR]] : !cir.ptr<!s32i>, !s32i
+// CIR:   %[[IS_CONSTANT:.*]] = cir.is_constant %[[ELE]] : !s32i -> !cir.bool
+
+// LLVM: define {{.*}} i32 @test6()
+// LLVM:   %[[TMP1:.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @arr, i64 8)
+// LLVM:   %[[TMP2:.*]] = call i1 @llvm.is.constant.i32(i32 %[[TMP1]])
+
+// OGCG: define {{.*}} i32 @test6()
+// OGCG:   %[[TMP1:.*]] = load i32, ptr getelementptr inbounds ([3 x i32], ptr @arr, i64 0, i64 2)
+// OGCG:   %[[TMP2:.*]] = call i1 @llvm.is.constant.i32(i32 %[[TMP1]])
+
+const int c_arr[] = { 1, 2, 3 };
+
+int test7(void) {
+  return __builtin_constant_p(c_arr[2]);
+}
+
+// CIR: cir.func {{.*}} @test7() -> !s32i
+// CIR:   %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+// CIR:   cir.store %[[ONE]], %[[TMP1]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return %[[TMP2]] : !s32i
+
+// LLVM: define{{.*}} i32 @test7()
+// LLVM:   %[[TMP1:.*]] = alloca i32
+// LLVM:   store i32 1, ptr %[[TMP1]]
+// LLVM:   %[[TMP2:.*]] = load i32, ptr %[[TMP1]]
+// LLVM:   ret i32 %[[TMP2]]
+
+// OGCG: define{{.*}} i32 @test7()
+// OGCG:   ret i32 1
+
+int test8(void) {
+  return __builtin_constant_p(c_arr);
+}
+
+// CIR: cir.func {{.*}} @test8() -> !s32i
+// CIR:   %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR:   cir.store %[[ZERO]], %[[TMP1]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return %[[TMP2]] : !s32i
+
+// LLVM: define{{.*}} i32 @test8()
+// LLVM:   %[[TMP1:.*]] = alloca i32
+// LLVM:   store i32 0, ptr %[[TMP1]]
+// LLVM:   %[[TMP2:.*]] = load i32, ptr %[[TMP1]]
+// LLVM:   ret i32 %[[TMP2]]
+
+// OGCG: define{{.*}} i32 @test8()
+// OGCG:   ret i32 0
+
+/* --- Function pointers */
+
+int test9(void) {
+  return __builtin_constant_p(&test9);
+}
+
+// CIR: cir.func {{.*}} @test9() -> !s32i
+// CIR:   %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR:   cir.store %[[ZERO]], %[[TMP1]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return %[[TMP2]] : !s32i
+
+// LLVM: define{{.*}} i32 @test9()
+// LLVM:   %[[TMP1:.*]] = alloca i32
+// LLVM:   store i32 0, ptr %[[TMP1]]
+// LLVM:   %[[TMP2:.*]] = load i32, ptr %[[TMP1]]
+// LLVM:   ret i32 %[[TMP2]]
+
+// OGCG: define{{.*}} i32 @test9()
+// OGCG:   ret i32 0
+
+int test10(void) {
+  return __builtin_constant_p(&test10 != 0);
+}
+
+// CIR: cir.func {{.*}} @test10() -> !s32i
+// CIR:   %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+// CIR:   cir.store %[[ONE]], %[[TMP1]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return %[[TMP2]] : !s32i
+
+// LLVM: define{{.*}} i32 @test10()
+// LLVM:   %[[TMP1:.*]] = alloca i32
+// LLVM:   store i32 1, ptr %[[TMP1]]
+// LLVM:   %[[TMP2:.*]] = load i32, ptr %[[TMP1]]
+// LLVM:   ret i32 %[[TMP2]]
+
+// OGCG: define{{.*}} i32 @test10()
+// OGCG:   ret i32 1
+
+int test11_f(void);
+void test11(void) {
+  int a, b;
+  (void)__builtin_constant_p((a = b, test11_f()));
+}
+
+// CIR: cir.func {{.*}} @test11()
+// CIR-NOT: call {{.*}}test11_f
+
+// LLVM: define{{.*}} void @test11()
+// LLVM-NOT: call {{.*}}test11_f
+
+// OGCG: define{{.*}} void @test11()
+// OGCG-NOT: call {{.*}}test11_f
diff --git a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented-global.cpp b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented-global.cpp
deleted file mode 100644
index a5e4694c6f5e6..0000000000000
--- a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented-global.cpp
+++ /dev/null
@@ -1,6 +0,0 @@
-// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fopenacc -fclangir -emit-cir %s -o %t.cir -verify
-// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fopenacc -fclangir -emit-llvm %s -o %t-cir.ll -verify
-
-void foo() {}
-// expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Global Routine Construct}}
-#pragma acc routine(foo) seq
diff --git a/clang/test/CIR/CodeGenOpenACC/routine-anon-ns.cpp b/clang/test/CIR/CodeGenOpenACC/routine-anon-ns.cpp
new file mode 100644
index 0000000000000..7c0a2edee5257
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/routine-anon-ns.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+namespace {
+#pragma acc routine seq
+  void NSFunc1(){}
+#pragma acc routine seq
+  auto Lambda1 = [](){};
+
+  auto Lambda2 = [](){};
+} // namespace 
+
+#pragma acc routine(NSFunc1) seq
+#pragma acc routine(Lambda2) seq
+void force_emit() {
+  NSFunc1();
+  Lambda1();
+  Lambda2();
+}
+
+// CHECK: cir.func{{.*}} @[[F1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F1_R_NAME:.*]], @[[F1_R2_NAME:.*]]]>}
+// CHECK: cir.func {{.*}}lambda{{.*}} @[[L1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L1_R_NAME:.*]]]>}
+// CHECK: cir.func {{.*}}lambda{{.*}} @[[L2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L2_R_NAME:.*]]]>}
+//
+// CHECK: acc.routine @[[F1_R_NAME]] func(@[[F1_NAME]]) seq
+// CHECK: acc.routine @[[L1_R_NAME]] func(@[[L1_NAME]]) seq
+// CHECK: acc.routine @[[F1_R2_NAME]] func(@[[F1_NAME]]) seq
+// CHECK: acc.routine @[[L2_R_NAME]] func(@[[L2_NAME]]) seq
diff --git a/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp b/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp
new file mode 100644
index 0000000000000..81437e7e02ab1
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+#pragma acc routine seq nohost
+void Func1() {}
+
+void Func2() {}
+#pragma acc routine(Func2) seq
+
+#pragma acc routine worker
+void Func3() {}
+
+void Func4() {}
+#pragma acc routine(Func4) worker nohost
+
+#pragma acc routine nohost vector
+void Func5() {}
+
+void Func6() {}
+#pragma acc routine(Func6) nohost vector
+
+// CHECK: cir.func{{.*}} @[[F1_NAME:.*Func1[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F1_R_NAME:.*]]]>}
+// CHECK: acc.routine @[[F1_R_NAME]] func(@[[F1_NAME]]) seq nohost
+
+// CHECK: cir.func{{.*}} @[[F2_NAME:.*Func2[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F2_R_NAME:.*]]]>}
+
+// CHECK: cir.func{{.*}} @[[F3_NAME:.*Func3[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F3_R_NAME:.*]]]>}
+// CHECK: acc.routine @[[F3_R_NAME]] func(@[[F3_NAME]]) worker
+
+// CHECK: cir.func{{.*}} @[[F4_NAME:.*Func4[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F4_R_NAME:.*]]]>}
+
+// CHECK: cir.func{{.*}} @[[F5_NAME:.*Func5[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F5_R_NAME:.*]]]>}
+// CHECK: acc.routine @[[F5_R_NAME]] func(@[[F5_NAME]]) vector
+
+// CHECK: cir.func{{.*}} @[[F6_NAME:.*Func6[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F6_R_NAME:.*]]]>}
+
+// CHECK: acc.routine @[[F2_R_NAME]] func(@[[F2_NAME]]) seq
+// CHECK: acc.routine @[[F4_R_NAME]] func(@[[F4_NAME]]) worker nohost
+// CHECK: acc.routine @[[F6_R_NAME]] func(@[[F6_NAME]]) vector nohost
diff --git a/clang/test/CIR/CodeGenOpenACC/routine-globals.cpp b/clang/test/CIR/CodeGenOpenACC/routine-globals.cpp
new file mode 100644
index 0000000000000..5f125bbce6cb8
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/routine-globals.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+#pragma acc routine seq
+auto Lambda1 = [](){};
+
+auto Lambda2 = [](){};
+#pragma acc routine(Lambda2) seq
+#pragma acc routine(Lambda2) seq
+
+#pragma acc routine seq
+int GlobalFunc1();
+
+int GlobalFunc2();
+#pragma acc routine(GlobalFunc2) seq
+#pragma acc routine(GlobalFunc1) seq
+
+void force_emit() {
+  Lambda1();
+  Lambda2();
+  GlobalFunc1();
+  GlobalFunc2();
+}
+
+// CHECK: cir.func {{.*}}lambda{{.*}} @[[L1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L1_R_NAME:.*]]]>}
+// CHECK: cir.func {{.*}}lambda{{.*}} @[[L2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L2_R_NAME:.*]], @[[L2_R2_NAME:.*]]]>}
+//
+// CHECK: cir.func{{.*}} @[[G1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G1_R_NAME:.*]], @[[G1_R2_NAME:.*]]]>}
+// CHECK: cir.func{{.*}} @[[G2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G2_R_NAME:.*]]]>}
+
+// CHECK: acc.routine @[[L1_R_NAME]] func(@[[L1_NAME]]) seq
+// CHECK: acc.routine @[[G1_R_NAME]] func(@[[G1_NAME]]) seq
+// CHECK: acc.routine @[[L2_R_NAME]] func(@[[L2_NAME]]) seq
+// CHECK: acc.routine @[[L2_R2_NAME]] func(@[[L2_NAME]]) seq
+// CHECK: acc.routine @[[G2_R_NAME]] func(@[[G2_NAME]]) seq
+// CHECK: acc.routine @[[G1_R2_NAME]] func(@[[G1_NAME]]) seq
diff --git a/clang/test/CIR/CodeGenOpenACC/routine-globals2.cpp b/clang/test/CIR/CodeGenOpenACC/routine-globals2.cpp
new file mode 100644
index 0000000000000..e1aa5046684da
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/routine-globals2.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+#pragma acc routine seq
+void GlobalFunc4();
+#pragma acc routine(GlobalFunc4) seq
+
+#pragma acc routine seq
+#pragma acc routine seq
+void GlobalFunc5();
+#pragma acc routine(GlobalFunc5) seq
+#pragma acc routine(GlobalFunc5) seq
+
+void GlobalFunc6();
+void GlobalFunc6();
+#pragma acc routine(GlobalFunc6) seq
+void GlobalFunc6(){}
+
+void GlobalFunc7(){}
+#pragma acc routine(GlobalFunc7) seq
+
+void force_emit() {
+  GlobalFunc4();
+  GlobalFunc5();
+  GlobalFunc6();
+  GlobalFunc7();
+}
+
+// CHECK: cir.func{{.*}} @[[G6_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G6_R_NAME:.*]]]>}
+// CHECK: cir.func{{.*}} @[[G7_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G7_R_NAME:.*]]]>}
+
+// CHECK: cir.func{{.*}} @[[G4_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G4_R_NAME:.*]], @[[G4_R2_NAME:.*]]]>}
+// CHECK: cir.func{{.*}} @[[G5_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G5_R_NAME:.*]], @[[G5_R1_NAME:.*]], @[[G5_R2_NAME:.*]], @[[G5_R3_NAME:.*]]]>}
+
+// CHECK: acc.routine @[[G4_R_NAME]] func(@[[G4_NAME]]) seq
+// CHECK: acc.routine @[[G5_R_NAME]] func(@[[G5_NAME]]) seq
+// CHECK: acc.routine @[[G5_R1_NAME]] func(@[[G5_NAME]]) seq
+//
+// CHECK: acc.routine @[[G4_R2_NAME]] func(@[[G4_NAME]]) seq
+//
+// CHECK: acc.routine @[[G5_R2_NAME]] func(@[[G5_NAME]]) seq
+// CHECK: acc.routine @[[G5_R3_NAME]] func(@[[G5_NAME]]) seq
+//
+// CHECK: acc.routine @[[G6_R_NAME]] func(@[[G6_NAME]]) seq
+// CHECK: acc.routine @[[G7_R_NAME]] func(@[[G7_NAME]]) seq
diff --git a/clang/test/CIR/CodeGenOpenACC/routine-locals.cpp b/clang/test/CIR/CodeGenOpenACC/routine-locals.cpp
new file mode 100644
index 0000000000000..d338a9cea0d09
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/routine-locals.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+void GlobalFunc();
+void InFunc() {
+
+#pragma acc routine(GlobalFunc) seq
+  GlobalFunc();
+
+#pragma acc routine seq
+  auto Lambda1 = [](){};
+  Lambda1();
+
+  auto Lambda2 = [](){};
+#pragma acc routine(Lambda2) seq
+  Lambda2();
+};
+
+// CHECK: cir.func{{.*}} @[[G1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G1_R_NAME:.*]]]>}
+// CHECK: cir.func {{.*}}lambda{{.*}} @[[L1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L1_R_NAME:.*]]]>}
+// CHECK: cir.func {{.*}}lambda{{.*}} @[[L2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L2_R_NAME:.*]]]>}
+
+// CHECK: acc.routine @[[L1_R_NAME]] func(@[[L1_NAME]]) seq
+// CHECK: acc.routine @[[G1_R_NAME]] func(@[[G1_NAME]]) seq
+// CHECK: acc.routine @[[L2_R_NAME]] func(@[[L2_NAME]]) seq
diff --git a/clang/test/CIR/CodeGenOpenACC/routine-members.cpp b/clang/test/CIR/CodeGenOpenACC/routine-members.cpp
new file mode 100644
index 0000000000000..713500cfe3868
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/routine-members.cpp
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+struct S {
+#pragma acc routine seq
+  void MemFunc1();
+  void MemFunc2();
+#pragma acc routine(S::MemFunc2) seq
+  void MemFunc3();
+#pragma acc routine(S::MemFunc3) seq
+
+#pragma acc routine seq
+  static void StaticMemFunc1();
+  static void StaticMemFunc2();
+  static void StaticMemFunc3();
+#pragma acc routine(StaticMemFunc3) seq
+
+#pragma acc routine seq
+  static constexpr auto StaticLambda1 = [](){};
+ static constexpr auto StaticLambda2 = [](){};
+};
+#pragma acc routine(S::MemFunc2) seq
+#pragma acc routine(S::StaticLambda2) seq
+#pragma acc routine(S::StaticMemFunc2) seq
+
+void force_emit() {
+  S{}.MemFunc1();
+  S{}.MemFunc2();
+  S{}.MemFunc3();
+  S::StaticMemFunc1();
+  S::StaticMemFunc2();
+  S::StaticMemFunc3();
+  S::StaticLambda1();
+  S::StaticLambda2();
+}
+
+// CHECK: cir.func{{.*}} @[[MEM1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[MEM1_R_NAME:.*]]]>}
+// CHECK: cir.func{{.*}} @[[MEM2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[MEM2_R_NAME:.*]], @[[MEM2_R2_NAME:.*]]]>}
+// CHECK: cir.func{{.*}} @[[MEM3_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[MEM3_R_NAME:.*]]]>}
+//
+// CHECK: cir.func{{.*}} @[[STATICMEM1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[STATICMEM1_R_NAME:.*]]]>}
+// CHECK: cir.func{{.*}} @[[STATICMEM2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[STATICMEM2_R_NAME:.*]]]>}
+// CHECK: cir.func{{.*}} @[[STATICMEM3_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[STATICMEM3_R_NAME:.*]]]>}
+//
+// CHECK: cir.func {{.*}}lambda{{.*}} @[[L1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L1_R_NAME:.*]]]>}
+// CHECK: cir.func {{.*}}lambda{{.*}} @[[L2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L2_R_NAME:.*]]]>}
+//
+// CHECK: acc.routine @[[MEM1_R_NAME]] func(@[[MEM1_NAME]]) seq
+// CHECK: acc.routine @[[STATICMEM1_R_NAME]] func(@[[STATICMEM1_NAME]]) seq
+// CHECK: acc.routine @[[L1_R_NAME]] func(@[[L1_NAME]]) seq
+// CHECK: acc.routine @[[MEM2_R_NAME]] func(@[[MEM2_NAME]]) seq
+// CHECK: acc.routine @[[MEM3_R_NAME]] func(@[[MEM3_NAME]]) seq
+// CHECK: acc.routine @[[STATICMEM3_R_NAME]] func(@[[STATICMEM3_NAME]]) seq
+// CHECK: acc.routine @[[MEM2_R2_NAME]] func(@[[MEM2_NAME]]) seq
+// CHECK: acc.routine @[[L2_R_NAME]] func(@[[L2_NAME]]) seq
+// CHECK: acc.routine @[[STATICMEM2_R_NAME]] func(@[[STATICMEM2_NAME]]) seq
diff --git a/clang/test/CIR/CodeGenOpenACC/routine-ns.cpp b/clang/test/CIR/CodeGenOpenACC/routine-ns.cpp
new file mode 100644
index 0000000000000..9d1d677e79db8
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/routine-ns.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+namespace NS1 {
+#pragma acc routine seq
+  int NSFunc1();
+#pragma acc routine seq
+  auto Lambda1 = [](){};
+
+  auto Lambda2 = [](){};
+} // namespace NS1
+
+#pragma acc routine(NS1::NSFunc1) seq
+#pragma acc routine(NS1::Lambda2) seq
+
+void force_emit() {
+  NS1::NSFunc1();
+  NS1::Lambda1();
+  NS1::Lambda2();
+}
+
+// CHECK: cir.func{{.*}} @[[F1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F1_R_NAME:.*]], @[[F1_R2_NAME:.*]]]>}
+// CHECK: cir.func {{.*}}lambda{{.*}} @[[L1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L1_R_NAME:.*]]]>}
+// CHECK: cir.func {{.*}}lambda{{.*}} @[[L2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L2_R_NAME:.*]]]>}
+//
+// CHECK: acc.routine @[[F1_R_NAME]] func(@[[F1_NAME]]) seq 
+// CHECK: acc.routine @[[L1_R_NAME]] func(@[[L1_NAME]]) seq 
+// CHECK: acc.routine @[[F1_R2_NAME]] func(@[[F1_NAME]]) seq  
+// CHECK: acc.routine @[[L2_R_NAME]] func(@[[L2_NAME]]) seq 
diff --git a/clang/test/CIR/CodeGenOpenACC/routine-templ.cpp b/clang/test/CIR/CodeGenOpenACC/routine-templ.cpp
new file mode 100644
index 0000000000000..419442220a1ba
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/routine-templ.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+#pragma acc routine seq
+template<typename T>
+void func(){}
+
+void use() {
+  func<int>();
+  func<float>();
+}
+
+// CHECK: cir.func{{.*}} @[[T1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[T1_R_NAME:.*]]]>}
+// CHECK: cir.func{{.*}} @[[T2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[T2_R_NAME:.*]]]>}
+//
+// CHECK: acc.routine @[[T1_R_NAME]] func(@[[T1_NAME]]) seq
+// CHECK: acc.routine @[[T2_R_NAME]] func(@[[T2_NAME]]) seq
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c
index 613a390bc6d36..04834ece3a4a6 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c
@@ -1,17 +1,26 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vmaxnmaq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmaq_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) #[[ATTR3:[0-9]+]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b)
 {
@@ -22,12 +31,19 @@ float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmaq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmaq_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b)
 {
@@ -38,12 +54,19 @@ float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmaq_m_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_m_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmaq_m_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 {
@@ -54,12 +77,19 @@ float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmaq_m_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_m_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmaq_m_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vmaxnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
 {
@@ -69,3 +99,5 @@ float32x4_t test_vmaxnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
     return vmaxnmaq_m_f32(a, b, p);
 #endif /* POLYMORPHIC */
 }
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CHECK: {{.*}}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
index bad7cd903ab16..1225353a5a9d2 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
@@ -1,15 +1,22 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vmaxnmq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
-// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmq_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b)
 {
@@ -20,10 +27,15 @@ float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
-// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmq_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b)
 {
@@ -34,12 +46,19 @@ float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmq_m_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_m_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmq_m_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
 {
@@ -50,12 +69,19 @@ float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmq_m_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_m_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmq_m_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
 {
@@ -66,12 +92,19 @@ float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmq_x_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef)
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_x_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmq_x_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 {
@@ -82,12 +115,19 @@ float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmq_x_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef)
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_x_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmq_x_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vmaxnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
 {
@@ -97,3 +137,5 @@ float32x4_t test_vmaxnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
     return vmaxnmq_x_f32(a, b, p);
 #endif /* POLYMORPHIC */
 }
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CHECK: {{.*}}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c
index 0182cf7c5b6b3..fc0dc5701e4d9 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c
@@ -1,17 +1,26 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vminnmaq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmaq_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmaq_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) #[[ATTR3:[0-9]+]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnm.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vminnmaq_f16(float16x8_t a, float16x8_t b)
 {
@@ -22,12 +31,19 @@ float16x8_t test_vminnmaq_f16(float16x8_t a, float16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmaq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmaq_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmaq_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnm.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vminnmaq_f32(float32x4_t a, float32x4_t b)
 {
@@ -38,12 +54,19 @@ float32x4_t test_vminnmaq_f32(float32x4_t a, float32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmaq_m_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmaq_m_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmaq_m_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vminnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 {
@@ -54,12 +77,19 @@ float16x8_t test_vminnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmaq_m_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmaq_m_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmaq_m_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vminnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
 {
@@ -69,3 +99,5 @@ float32x4_t test_vminnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
     return vminnmaq_m_f32(a, b, p);
 #endif /* POLYMORPHIC */
 }
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CHECK: {{.*}}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
index b48ff9d84b8f6..7dbad94c77674 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
@@ -1,15 +1,22 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vminnmq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
-// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmq_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmq_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vminnm.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b)
 {
@@ -20,10 +27,15 @@ float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
-// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmq_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmq_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vminnm.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b)
 {
@@ -34,12 +46,19 @@ float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmq_m_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmq_m_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmq_m_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
 {
@@ -50,12 +69,19 @@ float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmq_m_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmq_m_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmq_m_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
 {
@@ -66,12 +92,19 @@ float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmq_x_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef)
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmq_x_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmq_x_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vminnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 {
@@ -82,12 +115,19 @@ float16x8_t test_vminnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmq_x_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef)
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmq_x_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmq_x_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vminnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
 {
@@ -97,3 +137,5 @@ float32x4_t test_vminnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
     return vminnmq_x_f32(a, b, p);
 #endif /* POLYMORPHIC */
 }
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CHECK: {{.*}}
diff --git a/clang/test/CodeGen/distributed-thin-lto/memprof-pgho.cpp b/clang/test/CodeGen/distributed-thin-lto/memprof-pgho.cpp
index ed05962846aff..317efd1b3a138 100644
--- a/clang/test/CodeGen/distributed-thin-lto/memprof-pgho.cpp
+++ b/clang/test/CodeGen/distributed-thin-lto/memprof-pgho.cpp
@@ -1,6 +1,8 @@
 // Test end-to-end ThinLTO optimization pipeline with PGHO, that it does not
 // interfere with other allocation instrumentation features.
 //
+// REQUIRES: x86-registered-target
+//
 // RUN: split-file %s %t
 // RUN: llvm-profdata merge %t/memprof.yaml -o %t/use.memprofdata
 //
diff --git a/clang/test/Misc/amdgcn.languageOptsOpenCL.cl b/clang/test/Misc/amdgcn.languageOptsOpenCL.cl
index 80c0825895c86..57ea891b3eb29 100644
--- a/clang/test/Misc/amdgcn.languageOptsOpenCL.cl
+++ b/clang/test/Misc/amdgcn.languageOptsOpenCL.cl
@@ -162,6 +162,10 @@
   #ifndef __opencl_c_program_scope_global_variables
     #error "Missing __opencl_c_program_scope_global_variables define"
   #endif
+
+  #ifndef __opencl_c_read_write_images
+    #error "Missing __opencl_c_read_write_images define"
+  #endif
 #endif
 
 #if (__OPENCL_C_VERSION__ >= 300)
diff --git a/clang/test/OpenMP/amdgcn_weak_alias.c b/clang/test/OpenMP/amdgcn_weak_alias.c
index a9d5c1737b321..33c7dc0041810 100644
--- a/clang/test/OpenMP/amdgcn_weak_alias.c
+++ b/clang/test/OpenMP/amdgcn_weak_alias.c
@@ -94,10 +94,3 @@ int Three(void) __attribute__ ((weak, alias("__Three")));
 int Three_(void) __attribute__ ((alias("__Three")));
 extern int __attribute__((weak, alias("__Three_var"))) Three_var;
 extern int __attribute__((alias("__Three_var"))) Three_var_;
-//.
-// HOST: [[META0:![0-9]+]] = !{i32 1, !"__Two_var", i32 0, i32 0}
-// HOST: [[META1:![0-9]+]] = !{i32 1, !"__Three_var", i32 0, i32 1}
-//.
-// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"__Two_var", i32 0, i32 0}
-// DEVICE: [[META1:![0-9]+]] = !{i32 1, !"__Three_var", i32 0, i32 1}
-//.
diff --git a/clang/test/SemaHLSL/static_resources.hlsl b/clang/test/SemaHLSL/static_resources.hlsl
new file mode 100644
index 0000000000000..f71e9ea98e0d9
--- /dev/null
+++ b/clang/test/SemaHLSL/static_resources.hlsl
@@ -0,0 +1,138 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s
+
+// CHECK-DAG: [[ONE_STR:@.*]] = private unnamed_addr constant [4 x i8] c"One\00"
+// CHECK-DAG: [[ARRAY_STR:@.*]] = private unnamed_addr constant [6 x i8] c"Array\00"
+// CHECK-DAG: [[ONEWITHCOUNTER_STR:@.*]] = private unnamed_addr constant [15 x i8] c"OneWithCounter\00"
+// CHECK-DAG: [[ARRAYWITHCOUNTER_STR:@.*]] = private unnamed_addr constant [17 x i8] c"ArrayWithCounter\00"
+// CHECK-NOT: private unnamed_addr constant [{{[0-9]+}} x i8] c"Static
+
+RWBuffer<float> One : register(u1, space5);
+RWBuffer<float> Array[2] : register(u10, space6);
+RWStructuredBuffer<int> OneWithCounter : register(u2, space4);
+RWStructuredBuffer<int> ArrayWithCounter[2] : register(u7, space4);
+
+// Check that the non-static resource One is initialized from binding on
+// startup (register 1, space 5).
+// CHECK: define internal void @__cxx_global_var_init{{.*}}
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @hlsl::RWBuffer<float>::__createFromBinding(unsigned int, unsigned int, int, unsigned int, char const*)
+// CHECK-SAME: (ptr {{.*}} @One, i32 noundef 1, i32 noundef 5, i32 noundef 1, i32 noundef 0, ptr noundef [[ONE_STR]])
+
+// Check that the non-static resource OneWithCounter is initialized from binding on
+// startup (register 2, space 4).
+// CHECK: define internal void @__cxx_global_var_init{{.*}}
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @hlsl::RWStructuredBuffer<int>::__createFromBindingWithImplicitCounter(unsigned int, unsigned int, int, unsigned int, char const*, unsigned int)
+// CHECK-SAME: (ptr {{.*}} @OneWithCounter, i32 noundef 2, i32 noundef 4, i32 noundef 1, i32 noundef 0, ptr noundef [[ONEWITHCOUNTER_STR]], i32 noundef 0)
+
+// Note that non-static resource arrays are not initialized on startup.
+// The individual resources from the array are initialized on access.
+
+static RWBuffer<float> StaticOne;
+static RWBuffer<float> StaticArray[2];
+
+// Check that StaticOne resource is initialized on startup with the default
+// constructor and not from binding. It will initalize the handle to poison.
+// CHECK: define internal void @__cxx_global_var_init{{.*}}
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @hlsl::RWBuffer<float>::RWBuffer()(ptr {{.*}} @StaticOne)
+
+// Check that StaticArray elements are initialized on startup with the default
+// constructor and not from binding. The initializer will loop over the array
+// elements and call the default constructor for each one, setting the handle to poison.
+// CHECK: define internal void @__cxx_global_var_init{{.*}}
+// CHECK-NEXT: entry:
+// CHECK-NEXT: br label %arrayctor.loop
+// CHECK: arrayctor.loop:                                   ; preds = %arrayctor.loop, %entry
+// CHECK-NEXT:   %arrayctor.cur = phi ptr [ @StaticArray, %entry ], [ %arrayctor.next, %arrayctor.loop ]
+// CHECK-NEXT: call void @hlsl::RWBuffer<float>::RWBuffer()(ptr {{.*}} %arrayctor.cur)
+// CHECK-NEXT: %arrayctor.next = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %arrayctor.cur, i32 1
+// CHECK-NEXT: %arrayctor.done = icmp eq ptr %arrayctor.next, getelementptr inbounds (%"class.hlsl::RWBuffer", ptr @StaticArray, i32 2)
+// CHECK-NEXT: br i1 %arrayctor.done, label %arrayctor.cont, label %arrayctor.loop
+// CHECK: arrayctor.cont:                                   ; preds = %arrayctor.loop
+// CHECK-NEXT: ret void
+
+static RWStructuredBuffer<int> StaticOneWithCounter;
+
+// Check that StaticOneWithCounter resource is initialized on startup with the default
+// constructor and not from binding. It will initalize the handle to poison.
+// CHECK: define internal void @__cxx_global_var_init{{.*}}
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @hlsl::RWStructuredBuffer<int>::RWStructuredBuffer()(ptr {{.*}} @StaticOneWithCounter)
+
+// No other global initialization routines should be present.
+// CHECK-NOT: define internal void @__cxx_global_var_init{{.*}}
+
+[numthreads(4,1,1)]
+void main() {
+// CHECK: define internal void @main()()
+// CHECK-NEXT: entry:
+// CHECK-NEXT: %[[TMP0:.*]] = alloca %"class.hlsl::RWBuffer"
+
+  static RWBuffer<float> StaticLocal;
+// Check that StaticLocal is initialized by default constructor (handle set to poison)
+// and not from binding.
+// call void @hlsl::RWBuffer<float>::RWBuffer()(ptr {{.*}} @main()::StaticLocal)
+
+  StaticLocal = Array[1];
+// A[2][0] is accessed here, so it should be initialized from binding (register 10, space 6, index 1),
+// and then assigned to StaticLocal using = operator.
+// CHECK: call void @hlsl::RWBuffer<float>::__createFromBinding(unsigned int, unsigned int, int, unsigned int, char const*)
+// CHECK-SAME: (ptr {{.*}} %[[TMP0]], i32 noundef 10, i32 noundef 6, i32 noundef 2, i32 noundef 1, ptr noundef [[ARRAY_STR]])
+// CHECK-NEXT: call {{.*}} ptr @hlsl::RWBuffer<float>::operator=({{.*}})(ptr {{.*}} @main()::StaticLocal, ptr {{.*}} %[[TMP0]])
+
+  StaticOne = One;
+// Operator = call to assign non-static One handle to static StaticOne.
+// CHECK-NEXT: call {{.*}} ptr @hlsl::RWBuffer<float>::operator=({{.*}})(ptr {{.*}} @StaticOne, ptr {{.*}} @One)
+
+  StaticArray = Array;
+// Check that each elements of StaticArray is initialized from binding (register 10, space 6, indices 0 and 1).
+// CHECK: call void @hlsl::RWBuffer<float>::__createFromBinding(unsigned int, unsigned int, int, unsigned int, char const*)
+// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::RWBuffer") align 4 @StaticArray, i32 noundef 10, i32 noundef 6, i32 noundef 2, i32 noundef 0, ptr noundef [[ARRAY_STR]])
+// CHECK-NEXT: call void @hlsl::RWBuffer<float>::__createFromBinding(unsigned int, unsigned int, int, unsigned int, char const*)
+// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::RWBuffer") align 4 getelementptr ([2 x %"class.hlsl::RWBuffer"], ptr @StaticArray, i32 0, i32 1),
+// CHECK-SAME: i32 noundef 10, i32 noundef 6, i32 noundef 2, i32 noundef 1, ptr noundef [[ARRAY_STR]]
+
+  StaticArray[1] = One;
+// Operator = call to assign non-static One handle to StaticArray element.
+// CHECK-NEXT: call {{.*}} ptr @hlsl::RWBuffer<float>::operator=(hlsl::RWBuffer<float> const&)
+// CHECK-SAME: (ptr {{.*}} getelementptr inbounds ([2 x %"class.hlsl::RWBuffer"], ptr @StaticArray, i32 0, i32 1), ptr {{.*}} @One)
+
+  StaticLocal[0] = 123;
+// CHECK-NEXT: %[[PTR0:.*]] = call {{.*}} ptr @hlsl::RWBuffer<float>::operator[](unsigned int)(ptr {{.*}} @main()::StaticLocal, i32 noundef 0)
+// CHECK-NEXT: store float 1.230000e+02, ptr %[[PTR0]]
+
+  StaticOne[1] = 456;
+// CHECK-NEXT: %[[PTR1:.*]] = call {{.*}} ptr @hlsl::RWBuffer<float>::operator[](unsigned int)(ptr {{.*}}) @StaticOne, i32 noundef 1)
+// CHECK-NEXT: store float 4.560000e+02, ptr %[[PTR1]], align 4
+
+  StaticArray[1][2] = 789;
+// CHECK-NEXT: %[[PTR2:.*]] = call {{.*}} ptr @hlsl::RWBuffer<float>::operator[](unsigned int)
+// CHECK-SAME: (ptr {{.*}} getelementptr inbounds ([2 x %"class.hlsl::RWBuffer"], ptr @StaticArray, i32 0, i32 1), i32 noundef 2)
+// CHECK-NEXT: store float 7.890000e+02, ptr %[[PTR2]], align 4
+
+  static RWStructuredBuffer<int> StaticLocalWithCounter;
+// Check that StaticLocalWithCounter is initialized by default constructor (handle set to poison)
+// and not from binding.
+// call void @hlsl::RWStructuredBuffer<int>::RWStructuredBuffer()(ptr {{.*}} @main()::StaticLocalWithCounter)
+
+  static RWStructuredBuffer<int> StaticLocalArrayWithCounter[2];
+
+  StaticLocalWithCounter = OneWithCounter;
+// Operator = call to assign non-static OneWithCounter handles to StaticLocalWithCounter handles.
+// CHECK: call {{.*}} ptr @hlsl::RWStructuredBuffer<int>::operator=(hlsl::RWStructuredBuffer<int> const&)(ptr {{.*}} @main()::StaticLocalWithCounter, ptr {{.*}} @OneWithCounter)
+
+  StaticLocalArrayWithCounter = ArrayWithCounter;
+// Check that each elements of StaticLocalArrayWithCounter is initialized from binding
+// of ArrayWithCounter (register 7, space 4, indices 0 and 1).
+// CHECK: call void @hlsl::RWStructuredBuffer<int>::__createFromBindingWithImplicitCounter(unsigned int, unsigned int, int, unsigned int, char const*, unsigned int)
+// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::RWStructuredBuffer") align 4 @main()::StaticLocalArrayWithCounter,
+// CHECK-SAME: i32 noundef 7, i32 noundef 4, i32 noundef 2, i32 noundef 0, ptr noundef [[ARRAYWITHCOUNTER_STR]], i32 noundef 1)
+
+// CHECK-NEXT: call void @hlsl::RWStructuredBuffer<int>::__createFromBindingWithImplicitCounter(unsigned int, unsigned int, int, unsigned int, char const*, unsigned int)
+// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::RWStructuredBuffer") align 4 getelementptr ([2 x %"class.hlsl::RWStructuredBuffer"], ptr @main()::StaticLocalArrayWithCounter, i32 0, i32 1),
+// CHECK-SAME: i32 noundef 7, i32 noundef 4, i32 noundef 2, i32 noundef 1, ptr noundef [[ARRAYWITHCOUNTER_STR]], i32 noundef 1)
+}
+
+// No other binding initialization calls should be present.
+// CHECK-NOT: call void @hlsl::RWBuffer<float>::__createFrom{{.*}}Binding{{.*}}
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 5f5bf42df5e6b..3efa28b0469c1 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -284,11 +284,9 @@ class ResourceDirectoryCache {
     if (CachedResourceDir != Cache.end())
       return CachedResourceDir->second;
 
-    std::vector<StringRef> PrintResourceDirArgs{ClangBinaryName};
-    if (ClangCLMode)
-      PrintResourceDirArgs.push_back("/clang:-print-resource-dir");
-    else
-      PrintResourceDirArgs.push_back("-print-resource-dir");
+    const std::array<StringRef, 2> PrintResourceDirArgs{
+        ClangBinaryName,
+        ClangCLMode ? "/clang:-print-resource-dir" : "-print-resource-dir"};
 
     llvm::SmallString<64> OutputFile, ErrorFile;
     llvm::sys::fs::createTemporaryFile("print-resource-dir-output",
diff --git a/flang-rt/include/flang-rt/runtime/descriptor.h b/flang-rt/include/flang-rt/runtime/descriptor.h
index 92d2210cbc640..8c848fcab24ee 100644
--- a/flang-rt/include/flang-rt/runtime/descriptor.h
+++ b/flang-rt/include/flang-rt/runtime/descriptor.h
@@ -512,9 +512,7 @@ class Descriptor {
 
   RT_API_ATTRS void Check() const;
 
-  // When dumpRawType, dumps stringified CFI_type_*, otherwise
-  // try to canonicalize and print as a Fortran type.
-  void Dump(FILE * = stdout, bool dumpRawType = true) const;
+  void Dump(FILE * = stdout) const;
 
   RT_API_ATTRS inline bool HasAddendum() const {
     return raw_.extra & _CFI_ADDENDUM_FLAG;
diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp
index 8c547107a47de..c95da0a5371e5 100644
--- a/flang-rt/lib/runtime/descriptor.cpp
+++ b/flang-rt/lib/runtime/descriptor.cpp
@@ -292,168 +292,14 @@ RT_API_ATTRS void Descriptor::Check() const {
   // TODO
 }
 
-static const char *GetTypeStr(ISO::CFI_type_t type, bool dumpRawType) {
-  if (dumpRawType) {
-#define CASE(x) \
-  case (x): \
-    return #x;
-    switch (type) {
-      CASE(CFI_type_signed_char)
-      CASE(CFI_type_short)
-      CASE(CFI_type_int)
-      CASE(CFI_type_long)
-      CASE(CFI_type_long_long)
-      CASE(CFI_type_size_t)
-      CASE(CFI_type_int8_t)
-      CASE(CFI_type_int16_t)
-      CASE(CFI_type_int32_t)
-      CASE(CFI_type_int64_t)
-      CASE(CFI_type_int128_t)
-      CASE(CFI_type_int_least8_t)
-      CASE(CFI_type_int_least16_t)
-      CASE(CFI_type_int_least32_t)
-      CASE(CFI_type_int_least64_t)
-      CASE(CFI_type_int_least128_t)
-      CASE(CFI_type_int_fast8_t)
-      CASE(CFI_type_int_fast16_t)
-      CASE(CFI_type_int_fast32_t)
-      CASE(CFI_type_int_fast64_t)
-      CASE(CFI_type_int_fast128_t)
-      CASE(CFI_type_intmax_t)
-      CASE(CFI_type_intptr_t)
-      CASE(CFI_type_ptrdiff_t)
-      CASE(CFI_type_half_float)
-      CASE(CFI_type_bfloat)
-      CASE(CFI_type_float)
-      CASE(CFI_type_double)
-      CASE(CFI_type_extended_double)
-      CASE(CFI_type_long_double)
-      CASE(CFI_type_float128)
-      CASE(CFI_type_half_float_Complex)
-      CASE(CFI_type_bfloat_Complex)
-      CASE(CFI_type_float_Complex)
-      CASE(CFI_type_double_Complex)
-      CASE(CFI_type_extended_double_Complex)
-      CASE(CFI_type_long_double_Complex)
-      CASE(CFI_type_float128_Complex)
-      CASE(CFI_type_Bool)
-      CASE(CFI_type_char)
-      CASE(CFI_type_cptr)
-      CASE(CFI_type_struct)
-      CASE(CFI_type_char16_t)
-      CASE(CFI_type_char32_t)
-      CASE(CFI_type_uint8_t)
-      CASE(CFI_type_uint16_t)
-      CASE(CFI_type_uint32_t)
-      CASE(CFI_type_uint64_t)
-      CASE(CFI_type_uint128_t)
-    }
-#undef CASE
-    return nullptr;
-  }
-  TypeCode code{type};
-
-  if (!code.IsValid())
-    return "invalid";
-
-  common::optional<std::pair<TypeCategory, int>> categoryAndKind =
-      code.GetCategoryAndKind();
-  if (!categoryAndKind)
-    return nullptr;
-
-  TypeCategory tcat;
-  int kind;
-  std::tie(tcat, kind) = *categoryAndKind;
-
-#define CASE(cat, k) \
-  case (k): \
-    return #cat "(kind=" #k ")";
-  switch (tcat) {
-  case TypeCategory::Integer:
-    switch (kind) {
-      CASE(INTEGER, 1)
-      CASE(INTEGER, 2)
-      CASE(INTEGER, 4)
-      CASE(INTEGER, 8)
-      CASE(INTEGER, 16)
-    }
-    break;
-  case TypeCategory::Unsigned:
-    switch (kind) {
-      CASE(UNSIGNED, 1)
-      CASE(UNSIGNED, 2)
-      CASE(UNSIGNED, 4)
-      CASE(UNSIGNED, 8)
-      CASE(UNSIGNED, 16)
-    }
-    break;
-  case TypeCategory::Real:
-    switch (kind) {
-      CASE(REAL, 2)
-      CASE(REAL, 3)
-      CASE(REAL, 4)
-      CASE(REAL, 8)
-      CASE(REAL, 10)
-      CASE(REAL, 16)
-    }
-    break;
-  case TypeCategory::Complex:
-    switch (kind) {
-      CASE(COMPLEX, 2)
-      CASE(COMPLEX, 3)
-      CASE(COMPLEX, 4)
-      CASE(COMPLEX, 8)
-      CASE(COMPLEX, 10)
-      CASE(COMPLEX, 16)
-    }
-    break;
-  case TypeCategory::Character:
-    switch (kind) {
-      CASE(CHARACTER, 1)
-      CASE(CHARACTER, 2)
-      CASE(CHARACTER, 4)
-    }
-    break;
-  case TypeCategory::Logical:
-    switch (kind) {
-      CASE(LOGICAL, 1)
-      CASE(LOGICAL, 2)
-      CASE(LOGICAL, 4)
-      CASE(LOGICAL, 8)
-    }
-    break;
-  case TypeCategory::Derived:
-    return "DERIVED";
-  }
-#undef CASE
-  return nullptr;
-}
-
-void Descriptor::Dump(FILE *f, bool dumpRawType) const {
+void Descriptor::Dump(FILE *f) const {
   std::fprintf(f, "Descriptor @ %p:\n", reinterpret_cast<const void *>(this));
   std::fprintf(f, "  base_addr %p\n", raw_.base_addr);
-  std::fprintf(f, "  elem_len  %zd\n", ElementBytes());
+  std::fprintf(f, "  elem_len  %zd\n", static_cast<std::size_t>(raw_.elem_len));
   std::fprintf(f, "  version   %d\n", static_cast<int>(raw_.version));
-  if (rank() > 0) {
-    std::fprintf(f, "  rank      %d\n", rank());
-  } else {
-    std::fprintf(f, "  scalar\n");
-  }
-  int ty = static_cast<int>(raw_.type);
-  if (const char *tyStr = GetTypeStr(raw_.type, dumpRawType)) {
-    std::fprintf(f, "  type      %d \"%s\"\n", ty, tyStr);
-  } else {
-    std::fprintf(f, "  type      %d\n", ty);
-  }
-  int attr = static_cast<int>(raw_.attribute);
-  if (IsPointer()) {
-    std::fprintf(f, "  attribute %d (pointer) \n", attr);
-  } else if (IsAllocatable()) {
-    std::fprintf(f, "  attribute %d (allocatable)\n", attr);
-  } else {
-    std::fprintf(f, "  attribute %d\n", attr);
-  }
-
+  std::fprintf(f, "  rank      %d\n", static_cast<int>(raw_.rank));
+  std::fprintf(f, "  type      %d\n", static_cast<int>(raw_.type));
+  std::fprintf(f, "  attribute %d\n", static_cast<int>(raw_.attribute));
   std::fprintf(f, "  extra     %d\n", static_cast<int>(raw_.extra));
   std::fprintf(f, "    addendum  %d\n", static_cast<int>(HasAddendum()));
   std::fprintf(f, "    alloc_idx %d\n", static_cast<int>(GetAllocIdx()));
diff --git a/flang-rt/lib/runtime/extensions.cpp b/flang-rt/lib/runtime/extensions.cpp
index d39e429eb88b9..c110b0381890c 100644
--- a/flang-rt/lib/runtime/extensions.cpp
+++ b/flang-rt/lib/runtime/extensions.cpp
@@ -416,14 +416,6 @@ std::int64_t RTNAME(time)() { return time(nullptr); }
 // MCLOCK: returns accumulated CPU time in ticks
 std::int32_t FORTRAN_PROCEDURE_NAME(mclock)() { return std::clock(); }
 
-void RTNAME(ShowDescriptor)(const Fortran::runtime::Descriptor *descr) {
-  if (descr) {
-    descr->Dump(stderr, /*dumpRawType=*/false);
-  } else {
-    std::fprintf(stderr, "NULL\n");
-  }
-}
-
 static void _internal_srand(int seed) { rand_seed = seed ? seed : 123459876; }
 
 // IRAND(I)
diff --git a/flang-rt/unittests/Runtime/Descriptor.cpp b/flang-rt/unittests/Runtime/Descriptor.cpp
index f86ff4cf16a55..4a7bb43a492af 100644
--- a/flang-rt/unittests/Runtime/Descriptor.cpp
+++ b/flang-rt/unittests/Runtime/Descriptor.cpp
@@ -9,7 +9,6 @@
 #include "flang-rt/runtime/descriptor.h"
 #include "tools.h"
 #include "gtest/gtest.h"
-#include <regex>
 
 using namespace Fortran::runtime;
 
@@ -159,115 +158,3 @@ TEST(Descriptor, FixedStride) {
   EXPECT_TRUE(descriptor.IsContiguous());
   EXPECT_EQ(descriptor.FixedStride().value_or(-666), 0);
 }
-
-static std::string getAddrFilteredContent(FILE *fin) {
-  rewind(fin);
-  std::ostringstream content;
-  char buffer[1024];
-  size_t bytes_read;
-  while ((bytes_read = fread(buffer, 1, sizeof(buffer), fin)) > 0) {
-    content.write(buffer, bytes_read);
-  }
-  return std::regex_replace(
-      content.str(), std::regex("(0x[0-9a-fA-F]*)"), "[address]");
-}
-
-TEST(Descriptor, Dump) {
-  StaticDescriptor<4> staticDesc[2];
-  Descriptor &descriptor{staticDesc[0].descriptor()};
-  using Type = std::int32_t;
-  Type data[8][8][8];
-  constexpr int four{static_cast<int>(sizeof data[0][0][0])};
-  TypeCode integer{TypeCategory::Integer, four};
-  // Scalar
-  descriptor.Establish(integer, four, data, 0);
-  FILE *tmpf = tmpfile();
-  ASSERT_TRUE(tmpf) << "tmpfile returned NULL";
-  auto resetTmpFile = [tmpf]() {
-    rewind(tmpf);
-    ftruncate(fileno(tmpf), 0);
-  };
-
-  descriptor.Dump(tmpf, /*dumpRawType=*/false);
-  // also dump as CFI type
-  descriptor.Dump(tmpf, /*dumpRawType=*/true);
-  std::string output = getAddrFilteredContent(tmpf);
-  ASSERT_STREQ(output.c_str(),
-      "Descriptor @ [address]:\n"
-      "  base_addr [address]\n"
-      "  elem_len  4\n"
-      "  version   20240719\n"
-      "  scalar\n"
-      "  type      9 \"INTEGER(kind=4)\"\n"
-      "  attribute 0\n"
-      "  extra     0\n"
-      "    addendum  0\n"
-      "    alloc_idx 0\n"
-      "Descriptor @ [address]:\n"
-      "  base_addr [address]\n"
-      "  elem_len  4\n"
-      "  version   20240719\n"
-      "  scalar\n"
-      "  type      9 \"CFI_type_int32_t\"\n"
-      "  attribute 0\n"
-      "  extra     0\n"
-      "    addendum  0\n"
-      "    alloc_idx 0\n");
-
-  // Contiguous matrix (0:7, 0:7)
-  SubscriptValue extent[3]{8, 8, 8};
-  descriptor.Establish(integer, four, data, 2, extent);
-  resetTmpFile();
-  descriptor.Dump(tmpf, /*dumpRawType=*/false);
-  output = getAddrFilteredContent(tmpf);
-  ASSERT_STREQ(output.c_str(),
-      "Descriptor @ [address]:\n"
-      "  base_addr [address]\n"
-      "  elem_len  4\n"
-      "  version   20240719\n"
-      "  rank      2\n"
-      "  type      9 \"INTEGER(kind=4)\"\n"
-      "  attribute 0\n"
-      "  extra     0\n"
-      "    addendum  0\n"
-      "    alloc_idx 0\n"
-      "  dim[0] lower_bound 0\n"
-      "         extent      8\n"
-      "         sm          4\n"
-      "  dim[1] lower_bound 0\n"
-      "         extent      8\n"
-      "         sm          32\n");
-
-  TypeCode real{TypeCategory::Real, four};
-  // Discontiguous real 3-D array (0:7, 0:6:2, 0:6:2)
-  descriptor.Establish(real, four, data, 3, extent);
-  descriptor.GetDimension(1).SetExtent(4);
-  descriptor.GetDimension(1).SetByteStride(8 * 2 * four);
-  descriptor.GetDimension(2).SetExtent(4);
-  descriptor.GetDimension(2).SetByteStride(8 * 8 * 2 * four);
-
-  resetTmpFile();
-  descriptor.Dump(tmpf, /*dumpRawType=*/false);
-  output = getAddrFilteredContent(tmpf);
-  ASSERT_STREQ(output.c_str(),
-      "Descriptor @ [address]:\n"
-      "  base_addr [address]\n"
-      "  elem_len  4\n"
-      "  version   20240719\n"
-      "  rank      3\n"
-      "  type      27 \"REAL(kind=4)\"\n"
-      "  attribute 0\n"
-      "  extra     0\n"
-      "    addendum  0\n"
-      "    alloc_idx 0\n"
-      "  dim[0] lower_bound 0\n"
-      "         extent      8\n"
-      "         sm          4\n"
-      "  dim[1] lower_bound 0\n"
-      "         extent      4\n"
-      "         sm          64\n"
-      "  dim[2] lower_bound 0\n"
-      "         extent      4\n"
-      "         sm          512\n");
-  fclose(tmpf);
-}
diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md
index ecc9143af69fd..31bead9f8bfdc 100644
--- a/flang/docs/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -1414,48 +1414,6 @@ This is prefixed by `STRING`, a colon and a space.
 - **Class:** subroutine
 - **Syntax:** `CALL PERROR(STRING)`
 
-### Non-Standard Intrinsics: SHOW_DESCRIPTOR
-
-#### Description
-`SHOW_DESCRIPTOR(VAR)` prints (on the C stderr stream) a contents of a descriptor for the variable VAR,
-which can be of any type and rank, including scalars.
-Requires use of flang_debug module.
-
-Here is an example of its output:
-```
-Descriptor @ 0x7ffe506fc368:
-  base_addr 0x55944caef0f0
-  elem_len  4
-  version   20240719
-  rank      1
-  type      9 "INTEGER(kind=4)"
-  attribute 2 (allocatable)
-  extra     0
-    addendum  0
-    alloc_idx 0
-  dim[0] lower_bound 1
-         extent      5
-         sm          4
-```
-
-#### Usage and Info
-- **Standard:** flang extension
-- **Class:** subroutine
-- **Syntax:** `CALL show_descriptor(VAR)`
-
-#### Example
-```Fortran
-subroutine test
-  use flang_debug
-  implicit none
-  character(len=9) :: c = 'Hey buddy'
-  integer :: a(5)
-  call show_descriptor(c)
-  call show_descriptor(c(1:3))
-  call show_descriptor(a)
-end subroutine test
-```
-
 ### Non-Standard Intrinsics: SRAND
 
 #### Description
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index b248106b51101..0ae9177f98fd8 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -413,7 +413,6 @@ struct IntrinsicLibrary {
   template <typename Shift>
   mlir::Value genShift(mlir::Type resultType, llvm::ArrayRef<mlir::Value>);
   mlir::Value genShiftA(mlir::Type resultType, llvm::ArrayRef<mlir::Value>);
-  void genShowDescriptor(llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genSign(mlir::Type, llvm::ArrayRef<mlir::Value>);
   mlir::Value genSind(mlir::Type, llvm::ArrayRef<mlir::Value>);
   mlir::Value genSinpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h b/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h
index 2f52ffce5eb43..30c3189366cec 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h
@@ -111,9 +111,6 @@ void genSleep(fir::FirOpBuilder &builder, mlir::Location loc,
 mlir::Value genChdir(fir::FirOpBuilder &builder, mlir::Location loc,
                      mlir::Value name);
 
-/// generate dump of a descriptor
-void genShowDescriptor(fir::FirOpBuilder &builder, mlir::Location loc,
-                       mlir::Value descriptor);
 
 mlir::Value genIrand(fir::FirOpBuilder &builder, mlir::Location loc,
                      mlir::Value i);
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index 07bb47e26b968..3fda523acb382 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -350,15 +350,14 @@ def cuf_SharedMemoryOp
   let arguments = (ins TypeAttr:$in_type, OptionalAttr<StrAttr>:$uniq_name,
       OptionalAttr<StrAttr>:$bindc_name, Variadic<AnyIntegerType>:$typeparams,
       Variadic<AnyIntegerType>:$shape,
-      Optional<AnyIntegerType>:$offset // offset in bytes from the shared memory
-                                       // base address.
-  );
+      // offset in bytes from the shared memory base address.
+      Optional<AnyIntegerType>:$offset, OptionalAttr<I64Attr>:$alignment);
 
   let results = (outs fir_ReferenceType:$ptr);
 
   let assemblyFormat = [{
       (`[` $offset^ `:` type($offset) `]`)? $in_type (`(` $typeparams^ `:` type($typeparams) `)`)?
-        (`,` $shape^ `:` type($shape) )?  attr-dict `->` qualified(type($ptr))
+        (`,` $shape^ `:` type($shape) )?  (`align` $alignment^ )? attr-dict `->` qualified(type($ptr))
   }];
 
   let builders = [OpBuilder<(ins "mlir::Type":$inType,
diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h
index 3167c554abbdd..0f133623475f8 100644
--- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h
+++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h
@@ -43,6 +43,15 @@ struct OpenACCPointerLikeModel
                mlir::TypedValue<mlir::acc::PointerLikeType> destination,
                mlir::TypedValue<mlir::acc::PointerLikeType> source,
                mlir::Type varType) const;
+
+  mlir::Value genLoad(mlir::Type pointer, mlir::OpBuilder &builder,
+                      mlir::Location loc,
+                      mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr,
+                      mlir::Type valueType) const;
+
+  bool genStore(mlir::Type pointer, mlir::OpBuilder &builder,
+                mlir::Location loc, mlir::Value valueToStore,
+                mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const;
 };
 
 template <typename T>
diff --git a/flang/include/flang/Runtime/extensions.h b/flang/include/flang/Runtime/extensions.h
index f3ed15b86f1be..f2765a5987ea1 100644
--- a/flang/include/flang/Runtime/extensions.h
+++ b/flang/include/flang/Runtime/extensions.h
@@ -25,11 +25,6 @@ typedef std::uint32_t gid_t;
 #else
 #include "sys/types.h" //pid_t
 #endif
-namespace Fortran {
-namespace runtime {
-class Descriptor;
-}
-} // namespace Fortran
 
 extern "C" {
 
@@ -107,9 +102,6 @@ int FORTRAN_PROCEDURE_NAME(mclock)();
 float FORTRAN_PROCEDURE_NAME(secnds)(float *refTime);
 float RTNAME(Secnds)(float *refTime, const char *sourceFile, int line);
 
-// Extension subroutine SHOW_DESCRIPTOR(D)
-void RTNAME(ShowDescriptor)(const Fortran::runtime::Descriptor *descr);
-
 // GNU extension function IRAND(I)
 int RTNAME(Irand)(int *i);
 
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index d69400e0ec188..bbcb766274e7f 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -1713,8 +1713,6 @@ static const IntrinsicInterface intrinsicSubroutine[]{
         {}, Rank::scalar, IntrinsicClass::impureSubroutine},
     {"second", {{"time", DefaultReal, Rank::scalar}}, {}, Rank::scalar,
         IntrinsicClass::impureSubroutine},
-    {"__builtin_show_descriptor", {{"d", AnyData, Rank::anyOrAssumedRank}}, {},
-        Rank::elemental, IntrinsicClass::impureSubroutine},
     {"system",
         {{"command", DefaultChar, Rank::scalar},
             {"exitstat", DefaultInt, Rank::scalar, Optionality::optional,
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index 270037f5fcb00..ae6120826f8d2 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -17,6 +17,8 @@
 #include "flang/Evaluate/common.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "mlir/Dialect/Index/IR/IndexOps.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
@@ -1489,6 +1491,13 @@ void CUDAIntrinsicLibrary::genTMABulkG2S(
       builder, loc, dst, src, barrier, fir::getBase(args[3]), {}, {});
 }
 
+static void setAlignment(mlir::Value ptr, unsigned alignment) {
+  if (auto declareOp = mlir::dyn_cast<hlfir::DeclareOp>(ptr.getDefiningOp()))
+    if (auto sharedOp = mlir::dyn_cast<cuf::SharedMemoryOp>(
+            declareOp.getMemref().getDefiningOp()))
+      sharedOp.setAlignment(alignment);
+}
+
 static void genTMABulkLoad(fir::FirOpBuilder &builder, mlir::Location loc,
                            mlir::Value barrier, mlir::Value src,
                            mlir::Value dst, mlir::Value nelem,
@@ -1496,8 +1505,11 @@ static void genTMABulkLoad(fir::FirOpBuilder &builder, mlir::Location loc,
   mlir::Value size = mlir::arith::MulIOp::create(builder, loc, nelem, eleSize);
   auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext());
   barrier = builder.createConvert(loc, llvmPtrTy, barrier);
-  dst = builder.createConvert(loc, llvmPtrTy, dst);
-  src = builder.createConvert(loc, llvmPtrTy, src);
+  setAlignment(dst, 16);
+  dst = convertPtrToNVVMSpace(builder, loc, dst,
+                              mlir::NVVM::NVVMMemorySpace::Shared);
+  src = convertPtrToNVVMSpace(builder, loc, src,
+                              mlir::NVVM::NVVMMemorySpace::Shared);
   mlir::NVVM::InlinePtxOp::create(
       builder, loc, mlir::TypeRange{}, {dst, src, size, barrier}, {},
       "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], "
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 75a74eeb18417..3619e5bb942db 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -727,10 +727,6 @@ static constexpr IntrinsicHandler handlers[]{
     {"shifta", &I::genShiftA},
     {"shiftl", &I::genShift<mlir::arith::ShLIOp>},
     {"shiftr", &I::genShift<mlir::arith::ShRUIOp>},
-    {"show_descriptor",
-     &I::genShowDescriptor,
-     {{{"d", asBox}}},
-     /*isElemental=*/false},
     {"sign", &I::genSign},
     {"signal",
      &I::genSignalSubroutine,
@@ -7888,16 +7884,6 @@ mlir::Value IntrinsicLibrary::genShiftA(mlir::Type resultType,
   return result;
 }
 
-void IntrinsicLibrary::genShowDescriptor(
-    llvm::ArrayRef<fir::ExtendedValue> args) {
-  assert(args.size() == 1 && "expected single argument for show_descriptor");
-  const mlir::Value descriptor = fir::getBase(args[0]);
-
-  assert(fir::isa_box_type(descriptor.getType()) &&
-         "argument must have been lowered to box type");
-  fir::runtime::genShowDescriptor(builder, loc, descriptor);
-}
-
 // SIGNAL
 void IntrinsicLibrary::genSignalSubroutine(
     llvm::ArrayRef<fir::ExtendedValue> args) {
diff --git a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
index caeed0c0f6bfe..4d366135c305f 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
@@ -471,13 +471,6 @@ mlir::Value fir::runtime::genChdir(fir::FirOpBuilder &builder,
   return fir::CallOp::create(builder, loc, func, args).getResult(0);
 }
 
-void fir::runtime::genShowDescriptor(fir::FirOpBuilder &builder,
-                                     mlir::Location loc, mlir::Value descAddr) {
-  mlir::func::FuncOp func{
-      fir::runtime::getRuntimeFunc<mkRTKey(ShowDescriptor)>(loc, builder)};
-  fir::CallOp::create(builder, loc, func, descAddr);
-}
-
 mlir::Value fir::runtime::genIrand(fir::FirOpBuilder &builder,
                                    mlir::Location loc, mlir::Value i) {
   auto runtimeFunc = fir::runtime::getRuntimeFunc<mkRTKey(Irand)>(loc, builder);
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index 687007d957225..671e5f9455c22 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -333,7 +333,7 @@ void cuf::SharedMemoryOp::build(
       bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName);
   build(builder, result, wrapAllocaResultType(inType),
         mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape,
-        /*offset=*/mlir::Value{});
+        /*offset=*/mlir::Value{}, /*alignment=*/mlir::IntegerAttr{});
   result.addAttributes(attributes);
 }
 
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index ae0f5fb8197fa..9fcc7d3681c39 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -1014,4 +1014,114 @@ template bool OpenACCPointerLikeModel<fir::LLVMPointerType>::genCopy(
     mlir::TypedValue<mlir::acc::PointerLikeType> source,
     mlir::Type varType) const;
 
+template <typename Ty>
+mlir::Value OpenACCPointerLikeModel<Ty>::genLoad(
+    mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr,
+    mlir::Type valueType) const {
+
+  // Unwrap to get the pointee type.
+  mlir::Type pointeeTy = fir::dyn_cast_ptrEleTy(pointer);
+  assert(pointeeTy && "expected pointee type to be extractable");
+
+  // Box types contain both a descriptor and referenced data. The genLoad API
+  // handles simple loads and cannot properly manage both parts.
+  if (fir::isa_box_type(pointeeTy))
+    return {};
+
+  // Unlimited polymorphic (class(*)) cannot be handled because type is unknown.
+  if (fir::isUnlimitedPolymorphicType(pointeeTy))
+    return {};
+
+  // Return empty for dynamic size types because the load logic
+  // cannot be determined simply from the type.
+  if (fir::hasDynamicSize(pointeeTy))
+    return {};
+
+  mlir::Value loadedValue = fir::LoadOp::create(builder, loc, srcPtr);
+
+  // If valueType is provided and differs from the loaded type, insert a convert
+  if (valueType && loadedValue.getType() != valueType)
+    return fir::ConvertOp::create(builder, loc, valueType, loadedValue);
+
+  return loadedValue;
+}
+
+template mlir::Value OpenACCPointerLikeModel<fir::ReferenceType>::genLoad(
+    mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr,
+    mlir::Type valueType) const;
+
+template mlir::Value OpenACCPointerLikeModel<fir::PointerType>::genLoad(
+    mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr,
+    mlir::Type valueType) const;
+
+template mlir::Value OpenACCPointerLikeModel<fir::HeapType>::genLoad(
+    mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr,
+    mlir::Type valueType) const;
+
+template mlir::Value OpenACCPointerLikeModel<fir::LLVMPointerType>::genLoad(
+    mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr,
+    mlir::Type valueType) const;
+
+template <typename Ty>
+bool OpenACCPointerLikeModel<Ty>::genStore(
+    mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::Value valueToStore,
+    mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const {
+
+  // Unwrap to get the pointee type.
+  mlir::Type pointeeTy = fir::dyn_cast_ptrEleTy(pointer);
+  assert(pointeeTy && "expected pointee type to be extractable");
+
+  // Box types contain both a descriptor and referenced data. The genStore API
+  // handles simple stores and cannot properly manage both parts.
+  if (fir::isa_box_type(pointeeTy))
+    return false;
+
+  // Unlimited polymorphic (class(*)) cannot be handled because type is unknown.
+  if (fir::isUnlimitedPolymorphicType(pointeeTy))
+    return false;
+
+  // Return false for dynamic size types because the store logic
+  // cannot be determined simply from the type.
+  if (fir::hasDynamicSize(pointeeTy))
+    return false;
+
+  // Get the type from the value being stored
+  mlir::Type valueType = valueToStore.getType();
+  mlir::Value convertedValue = valueToStore;
+
+  // If the value type differs from the pointee type, insert a convert
+  if (valueType != pointeeTy)
+    convertedValue =
+        fir::ConvertOp::create(builder, loc, pointeeTy, valueToStore);
+
+  fir::StoreOp::create(builder, loc, convertedValue, destPtr);
+  return true;
+}
+
+template bool OpenACCPointerLikeModel<fir::ReferenceType>::genStore(
+    mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::Value valueToStore,
+    mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const;
+
+template bool OpenACCPointerLikeModel<fir::PointerType>::genStore(
+    mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::Value valueToStore,
+    mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const;
+
+template bool OpenACCPointerLikeModel<fir::HeapType>::genStore(
+    mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::Value valueToStore,
+    mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const;
+
+template bool OpenACCPointerLikeModel<fir::LLVMPointerType>::genStore(
+    mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::Value valueToStore,
+    mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const;
+
 } // namespace fir::acc
diff --git a/flang/module/__fortran_builtins.f90 b/flang/module/__fortran_builtins.f90
index a9b60508785db..4d134fa4b62b1 100644
--- a/flang/module/__fortran_builtins.f90
+++ b/flang/module/__fortran_builtins.f90
@@ -28,9 +28,6 @@
   intrinsic :: __builtin_c_f_pointer
   public :: __builtin_c_f_pointer
 
-  intrinsic :: __builtin_show_descriptor
-  public :: __builtin_show_descriptor
-
   intrinsic :: sizeof ! extension
   public :: sizeof
 
diff --git a/flang/module/flang_debug.f90 b/flang/module/flang_debug.f90
deleted file mode 100644
index baab3b2477f49..0000000000000
--- a/flang/module/flang_debug.f90
+++ /dev/null
@@ -1,14 +0,0 @@
-!===-- module/flang_debug.f90 ----------------------------------------------===!
-!
-! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-! See https://llvm.org/LICENSE.txt for license information.
-! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-!
-!===------------------------------------------------------------------------===!
-
-module flang_debug
-
-  use __fortran_builtins, only: &
-    show_descriptor => __builtin_show_descriptor
-
-end module flang_debug
diff --git a/flang/test/Fir/OpenACC/pointer-like-interface-load.mlir b/flang/test/Fir/OpenACC/pointer-like-interface-load.mlir
new file mode 100644
index 0000000000000..170ea56b24742
--- /dev/null
+++ b/flang/test/Fir/OpenACC/pointer-like-interface-load.mlir
@@ -0,0 +1,95 @@
+// RUN: fir-opt %s --split-input-file --pass-pipeline="builtin.module(func.func(test-acc-pointer-like-interface{test-mode=load}))" 2>&1 | FileCheck %s
+
+func.func @test_load_scalar_f32() {
+  %ptr = fir.alloca f32 {test.ptr}
+  // CHECK: Successfully generated load for operation: %{{.*}} = fir.alloca f32 {test.ptr}
+  // CHECK: Loaded value type: f32
+  // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.ref<f32>
+  return
+}
+
+// -----
+
+func.func @test_load_scalar_i32() {
+  %ptr = fir.alloca i32 {test.ptr}
+  // CHECK: Successfully generated load for operation: %{{.*}} = fir.alloca i32 {test.ptr}
+  // CHECK: Loaded value type: i32
+  // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.ref<i32>
+  return
+}
+
+// -----
+
+func.func @test_load_scalar_i64() {
+  %ptr = fir.alloca i64 {test.ptr}
+  // CHECK: Successfully generated load for operation: %{{.*}} = fir.alloca i64 {test.ptr}
+  // CHECK: Loaded value type: i64
+  // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.ref<i64>
+  return
+}
+
+// -----
+
+func.func @test_load_heap_scalar() {
+  %ptr = fir.allocmem f64 {test.ptr}
+  // CHECK: Successfully generated load for operation: %{{.*}} = fir.allocmem f64 {test.ptr}
+  // CHECK: Loaded value type: f64
+  // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.heap<f64>
+  return
+}
+
+// -----
+
+func.func @test_load_logical() {
+  %ptr = fir.alloca !fir.logical<4> {test.ptr}
+  // CHECK: Successfully generated load for operation: %{{.*}} = fir.alloca !fir.logical<4> {test.ptr}
+  // CHECK: Loaded value type: !fir.logical<4>
+  // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.ref<!fir.logical<4>>
+  return
+}
+
+// -----
+
+func.func @test_load_derived_type() {
+  %ptr = fir.alloca !fir.type<_QTt{i:i32}> {test.ptr}
+  // CHECK: Successfully generated load for operation: %{{.*}} = fir.alloca !fir.type<_QTt{i:i32}> {test.ptr}
+  // CHECK: Loaded value type: !fir.type<_QTt{i:i32}>
+  // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.ref<!fir.type<_QTt{i:i32}>>
+  return
+}
+
+// -----
+
+func.func @test_load_constant_array() {
+  %ptr = fir.alloca !fir.array<10xf32> {test.ptr}
+  // CHECK: Successfully generated load for operation: %{{.*}} = fir.alloca !fir.array<10xf32> {test.ptr}
+  // CHECK: Loaded value type: !fir.array<10xf32>
+  // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.ref<!fir.array<10xf32>>
+  return
+}
+
+// -----
+
+func.func @test_load_dynamic_array_fails() {
+  %c10 = arith.constant 10 : index
+  %ptr = fir.alloca !fir.array<?xf32>, %c10 {test.ptr}
+  // CHECK: Failed to generate load for operation: %{{.*}} = fir.alloca !fir.array<?xf32>
+  return
+}
+
+// -----
+
+func.func @test_load_box_fails() {
+  %ptr = fir.alloca !fir.box<!fir.ptr<f32>> {test.ptr}
+  // CHECK: Failed to generate load for operation: %{{.*}} = fir.alloca !fir.box<!fir.ptr<f32>>
+  return
+}
+
+// -----
+
+func.func @test_load_unlimited_polymorphic_fails() {
+  %ptr = fir.alloca !fir.class<none> {test.ptr}
+  // CHECK: Failed to generate load for operation: %{{.*}} = fir.alloca !fir.class<none>
+  return
+}
+
diff --git a/flang/test/Fir/OpenACC/pointer-like-interface-store.mlir b/flang/test/Fir/OpenACC/pointer-like-interface-store.mlir
new file mode 100644
index 0000000000000..5ea4f0e750c65
--- /dev/null
+++ b/flang/test/Fir/OpenACC/pointer-like-interface-store.mlir
@@ -0,0 +1,85 @@
+// RUN: fir-opt %s --split-input-file --pass-pipeline="builtin.module(func.func(test-acc-pointer-like-interface{test-mode=store}))" 2>&1 | FileCheck %s
+
+func.func @test_store_scalar_f32() {
+  %ptr = fir.alloca f32 {test.ptr}
+  // CHECK: Successfully generated store for operation: %{{.*}} = fir.alloca f32 {test.ptr}
+  // CHECK: Generated: %[[VAL:.*]] = arith.constant 4.200000e+01 : f32
+  // CHECK: Generated: fir.store %[[VAL]] to %{{.*}} : !fir.ref<f32>
+  return
+}
+
+// -----
+
+func.func @test_store_scalar_i32() {
+  %ptr = fir.alloca i32 {test.ptr}
+  // CHECK: Successfully generated store for operation: %{{.*}} = fir.alloca i32 {test.ptr}
+  // CHECK: Generated: %[[VAL:.*]] = arith.constant 42 : i32
+  // CHECK: Generated: fir.store %[[VAL]] to %{{.*}} : !fir.ref<i32>
+  return
+}
+
+// -----
+
+func.func @test_store_scalar_i64() {
+  %ptr = fir.alloca i64 {test.ptr}
+  // CHECK: Successfully generated store for operation: %{{.*}} = fir.alloca i64 {test.ptr}
+  // CHECK: Generated: %[[VAL:.*]] = arith.constant 42 : i64
+  // CHECK: Generated: fir.store %[[VAL]] to %{{.*}} : !fir.ref<i64>
+  return
+}
+
+// -----
+
+func.func @test_store_heap_scalar() {
+  %ptr = fir.allocmem f64 {test.ptr}
+  // CHECK: Successfully generated store for operation: %{{.*}} = fir.allocmem f64 {test.ptr}
+  // CHECK: Generated: %[[VAL:.*]] = arith.constant 4.200000e+01 : f64
+  // CHECK: Generated: fir.store %[[VAL]] to %{{.*}} : !fir.heap<f64>
+  return
+}
+
+// -----
+
+func.func @test_store_with_type_conversion() {
+  %ptr = fir.alloca i32 {test.ptr}
+  // CHECK: Successfully generated store for operation: %{{.*}} = fir.alloca i32 {test.ptr}
+  // CHECK: Generated: %[[VAL:.*]] = arith.constant 42 : i32
+  // CHECK: Generated: fir.store %[[VAL]] to %{{.*}} : !fir.ref<i32>
+  return
+}
+
+// -----
+
+func.func @test_store_constant_array() {
+  %val = fir.undefined !fir.array<10xf32> {test.value}
+  %ptr = fir.alloca !fir.array<10xf32> {test.ptr}
+  // CHECK: Successfully generated store for operation: %{{.*}} = fir.alloca !fir.array<10xf32> {test.ptr}
+  // CHECK: Generated: fir.store %{{.*}} to %{{.*}} : !fir.ref<!fir.array<10xf32>>
+  return
+}
+
+// -----
+
+func.func @test_store_dynamic_array_fails() {
+  %c10 = arith.constant 10 : index
+  %ptr = fir.alloca !fir.array<?xf32>, %c10 {test.ptr}
+  // CHECK: Failed to generate store for operation: %{{.*}} = fir.alloca !fir.array<?xf32>
+  return
+}
+
+// -----
+
+func.func @test_store_box_fails() {
+  %ptr = fir.alloca !fir.box<!fir.ptr<f32>> {test.ptr}
+  // CHECK: Failed to generate store for operation: %{{.*}} = fir.alloca !fir.box<!fir.ptr<f32>>
+  return
+}
+
+// -----
+
+func.func @test_store_unlimited_polymorphic_fails() {
+  %ptr = fir.alloca !fir.class<none> {test.ptr}
+  // CHECK: Failed to generate store for operation: %{{.*}} = fir.alloca !fir.class<none>
+  return
+}
+
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 434322ea22265..1e3c66307c334 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -538,11 +538,12 @@ end subroutine
 ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_c4
 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_c4Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_c4Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: cuf.shared_memory !fir.array<1024xcomplex<f32>> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_c4Etmp"} -> !fir.ref<!fir.array<1024xcomplex<f32>>>
 ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
 ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32
 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
-! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr)
 ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
 
 attributes(global) subroutine test_tma_bulk_load_c8(a, n)
@@ -557,11 +558,12 @@ end subroutine
 ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_c8
 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_c8Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_c8Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: cuf.shared_memory !fir.array<1024xcomplex<f64>> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_c8Etmp"} -> !fir.ref<!fir.array<1024xcomplex<f64>>>
 ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
 ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 16 : i32
 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
-! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr)
 ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
 
 attributes(global) subroutine test_tma_bulk_load_i4(a, n)
@@ -576,11 +578,12 @@ end subroutine
 ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_i4
 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_i4Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_i4Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: cuf.shared_memory !fir.array<1024xi32> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_i4Etmp"} -> !fir.ref<!fir.array<1024xi32>>
 ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
 ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 4 : i32
 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
-! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr)
 ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
 
 attributes(global) subroutine test_tma_bulk_load_i8(a, n)
@@ -595,11 +598,12 @@ end subroutine
 ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_i8
 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_i8Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_i8Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: cuf.shared_memory !fir.array<1024xi64> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_i8Etmp"} -> !fir.ref<!fir.array<1024xi64>>
 ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
 ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32
 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
-! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr)
 ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
 
 attributes(global) subroutine test_tma_bulk_load_r2(a, n)
@@ -614,11 +618,12 @@ end subroutine
 ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r2
 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_r2Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_r2Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: cuf.shared_memory !fir.array<1024xf16> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_r2Etmp"} -> !fir.ref<!fir.array<1024xf16>>
 ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
 ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 2 : i32
 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
-! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr)
 ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
 
 attributes(global) subroutine test_tma_bulk_load_r4(a, n)
@@ -633,11 +638,12 @@ end subroutine
 ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r4
 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_r4Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_r4Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: cuf.shared_memory !fir.array<1024xf32> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_r4Etmp"} -> !fir.ref<!fir.array<1024xf32>>
 ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
 ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 4 : i32
 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
-! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr)
 ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
 
 attributes(global) subroutine test_tma_bulk_load_r8(a, n)
@@ -652,11 +658,12 @@ end subroutine
 ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r8
 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_r8Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_r8Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: cuf.shared_memory !fir.array<1024xf64> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_r8Etmp"} -> !fir.ref<!fir.array<1024xf64>>
 ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
 ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32
 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
-! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr)
 ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
 
 attributes(global) subroutine test_tma_bulk_store_c4(c, n)
diff --git a/flang/test/Lower/Intrinsics/show_descriptor.f90 b/flang/test/Lower/Intrinsics/show_descriptor.f90
deleted file mode 100644
index a0b8d3eb4348f..0000000000000
--- a/flang/test/Lower/Intrinsics/show_descriptor.f90
+++ /dev/null
@@ -1,241 +0,0 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck %s
-
-module test_show_descriptor
-use flang_debug
-contains
-subroutine test_int
-! CHECK-LABEL:   func.func @_QMtest_show_descriptorPtest_int() {
-  implicit none
-  integer :: n
-  integer,allocatable :: a(:)
-  n = 5
-  allocate(a(n))
-! CHECK:           %[[C3:.*]] = arith.constant 3 : index
-! CHECK:           %[[C1:.*]] = arith.constant 1 : index
-! CHECK:           %[[C5:.*]] = arith.constant 5 : i32
-! CHECK:           %[[C0:.*]] = arith.constant 0 : index
-! CHECK:           %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
-! CHECK:           %[[ALLOCA_0:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "a", uniq_name = "_QMtest_show_descriptorFtest_intEa"}
-! CHECK:           %[[ZERO_BITS_0:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
-! CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[C0]] : (index) -> !fir.shape<1>
-! CHECK:           %[[EMBOX_0:.*]] = fir.embox %[[ZERO_BITS_0]](%[[SHAPE_0]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
-! CHECK:           fir.store %[[EMBOX_0]] to %[[ALLOCA_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK:           %[[DECLARE_0:.*]] = fir.declare %[[ALLOCA_0]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMtest_show_descriptorFtest_intEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK:           %[[ALLOCA_1:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QMtest_show_descriptorFtest_intEn"}
-! CHECK:           %[[DECLARE_1:.*]] = fir.declare %[[ALLOCA_1]] {uniq_name = "_QMtest_show_descriptorFtest_intEn"} : (!fir.ref<i32>) -> !fir.ref<i32>
-! CHECK:           fir.store %[[C5]] to %[[DECLARE_1]] : !fir.ref<i32>
-! CHECK:           %[[LOAD_0:.*]] = fir.load %[[DECLARE_1]] : !fir.ref<i32>
-! CHECK:           %[[CONVERT_0:.*]] = fir.convert %[[LOAD_0]] : (i32) -> index
-! CHECK:           %[[CMPI_0:.*]] = arith.cmpi sgt, %[[CONVERT_0]], %[[C0]] : index
-! CHECK:           %[[SELECT_0:.*]] = arith.select %[[CMPI_0]], %[[CONVERT_0]], %[[C0]] : index
-! CHECK:           %[[ALLOCMEM_0:.*]] = fir.allocmem !fir.array<?xi32>, %[[SELECT_0]] {fir.must_be_heap = true, uniq_name = "_QMtest_show_descriptorFtest_intEa.alloc"}
-
-  call show_descriptor(a)
-! CHECK:           %[[SHAPE_1:.*]] = fir.shape %[[SELECT_0]] : (index) -> !fir.shape<1>
-! CHECK:           %[[EMBOX_1:.*]] = fir.embox %[[ALLOCMEM_0]](%[[SHAPE_1]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
-! CHECK:           fir.store %[[EMBOX_1]] to %[[DECLARE_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK:           %[[LOAD_1:.*]] = fir.load %[[DECLARE_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[LOAD_1]]) fastmath<contract> : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> ()
-
-  call show_descriptor(a(1:3))
-! CHECK:           %[[LOAD_2:.*]] = fir.load %[[DECLARE_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK:           %[[SHAPE_2:.*]] = fir.shape %[[C3]] : (index) -> !fir.shape<1>
-! CHECK:           %[[BOX_ADDR_0:.*]] = fir.box_addr %[[LOAD_2]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
-! CHECK:           %[[CONSTANT_4:.*]] = arith.constant 0 : index
-! CHECK:           %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[LOAD_2]], %[[CONSTANT_4]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-! CHECK:           %[[SHAPE_SHIFT_0:.*]] = fir.shape_shift %[[BOX_DIMS_0]]#0, %[[BOX_DIMS_0]]#1 : (index, index) -> !fir.shapeshift<1>
-! CHECK:           %[[ARRAY_COOR_0:.*]] = fir.array_coor %[[BOX_ADDR_0]](%[[SHAPE_SHIFT_0]]) %[[C1]] : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
-! CHECK:           %[[CONVERT_1:.*]] = fir.convert %[[ARRAY_COOR_0]] : (!fir.ref<i32>) -> !fir.ref<!fir.array<3xi32>>
-! CHECK:           %[[EMBOX_2:.*]] = fir.embox %[[CONVERT_1]](%[[SHAPE_2]]) : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_2]]) fastmath<contract> : (!fir.box<!fir.array<3xi32>>) -> ()
-  deallocate(a)
-end subroutine test_int
-
-subroutine test_char
-! CHECK-LABEL:   func.func @_QMtest_show_descriptorPtest_char() {
-  implicit none
-  character(len=9) :: c = 'Hey buddy'
-  call show_descriptor(c)
-! CHECK:           %[[C3:.*]] = arith.constant 3 : index
-! CHECK:           %[[C1:.*]] = arith.constant 1 : index
-! CHECK:           %[[C9:.*]] = arith.constant 9 : index
-! CHECK:           %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
-! CHECK:           %[[ADDRESS_OF_0:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_charEc) : !fir.ref<!fir.char<1,9>>
-! CHECK:           %[[DECLARE_0:.*]] = fir.declare %[[ADDRESS_OF_0]] typeparams %[[C9]] {uniq_name = "_QMtest_show_descriptorFtest_charEc"} : (!fir.ref<!fir.char<1,9>>, index) -> !fir.ref<!fir.char<1,9>>
-! CHECK:           %[[EMBOX_0:.*]] = fir.embox %[[DECLARE_0]] : (!fir.ref<!fir.char<1,9>>) -> !fir.box<!fir.char<1,9>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_0]]) fastmath<contract> : (!fir.box<!fir.char<1,9>>) -> ()
-
-  call show_descriptor(c(1:3))
-! CHECK:           %[[C1_0:.*]] = arith.constant 1 : index
-! CHECK:           %[[SUBI_0:.*]] = arith.subi %[[C1]], %[[C1_0]] : index
-! CHECK:           %[[CONVERT_0:.*]] = fir.convert %[[DECLARE_0]] : (!fir.ref<!fir.char<1,9>>) -> !fir.ref<!fir.array<9x!fir.char<1>>>
-! CHECK:           %[[COORDINATE_OF_0:.*]] = fir.coordinate_of %[[CONVERT_0]], %[[SUBI_0]] : (!fir.ref<!fir.array<9x!fir.char<1>>>, index) -> !fir.ref<!fir.char<1>>
-! CHECK:           %[[CONVERT_1:.*]] = fir.convert %[[COORDINATE_OF_0]] : (!fir.ref<!fir.char<1>>) -> !fir.ref<!fir.char<1,3>>
-! CHECK:           %[[EMBOX_1:.*]] = fir.embox %[[CONVERT_1]] : (!fir.ref<!fir.char<1,3>>) -> !fir.box<!fir.char<1,3>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_1]]) fastmath<contract> : (!fir.box<!fir.char<1,3>>) -> ()
-! CHECK:           return
-end subroutine test_char
-
-subroutine test_logical
-! CHECK-LABEL:   func.func @_QMtest_show_descriptorPtest_logical() {
-  implicit none
-  logical(kind=1) :: l1 = .false.
-  logical(kind=2) :: l2 = .true.
-  logical(kind=2), dimension(2), target :: la2 = (/ .true., .false. /)
-  logical(kind=2), dimension(:), pointer :: pla2
-! CHECK:           %[[C0:.*]] = arith.constant 0 : index
-! CHECK:           %[[C2:.*]] = arith.constant 2 : index
-! CHECK:           %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
-! CHECK:           %[[ADDRESS_OF_0:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_logicalEl1) : !fir.ref<!fir.logical<1>>
-! CHECK:           %[[DECLARE_0:.*]] = fir.declare %[[ADDRESS_OF_0]] {uniq_name = "_QMtest_show_descriptorFtest_logicalEl1"} : (!fir.ref<!fir.logical<1>>) -> !fir.ref<!fir.logical<1>>
-! CHECK:           %[[ADDRESS_OF_1:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_logicalEl2) : !fir.ref<!fir.logical<2>>
-! CHECK:           %[[DECLARE_1:.*]] = fir.declare %[[ADDRESS_OF_1]] {uniq_name = "_QMtest_show_descriptorFtest_logicalEl2"} : (!fir.ref<!fir.logical<2>>) -> !fir.ref<!fir.logical<2>>
-! CHECK:           %[[ADDRESS_OF_2:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_logicalEla2) : !fir.ref<!fir.array<2x!fir.logical<2>>>
-! CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1>
-! CHECK:           %[[DECLARE_2:.*]] = fir.declare %[[ADDRESS_OF_2]](%[[SHAPE_0]]) {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QMtest_show_descriptorFtest_logicalEla2"} : (!fir.ref<!fir.array<2x!fir.logical<2>>>, !fir.shape<1>) -> !fir.ref<!fir.array<2x!fir.logical<2>>>
-! CHECK:           %[[ALLOCA_0:.*]] = fir.alloca !fir.box<!fir.ptr<!fir.array<?x!fir.logical<2>>>> {bindc_name = "pla2", uniq_name = "_QMtest_show_descriptorFtest_logicalEpla2"}
-! CHECK:           %[[ZERO_BITS_0:.*]] = fir.zero_bits !fir.ptr<!fir.array<?x!fir.logical<2>>>
-! CHECK:           %[[SHAPE_1:.*]] = fir.shape %[[C0]] : (index) -> !fir.shape<1>
-! CHECK:           %[[EMBOX_0:.*]] = fir.embox %[[ZERO_BITS_0]](%[[SHAPE_1]]) : (!fir.ptr<!fir.array<?x!fir.logical<2>>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?x!fir.logical<2>>>>
-! CHECK:           fir.store %[[EMBOX_0]] to %[[ALLOCA_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<2>>>>>
-
-  call show_descriptor(l1)
-  call show_descriptor(l2)
-  pla2 => la2
-! CHECK:           %[[DECLARE_3:.*]] = fir.declare %[[ALLOCA_0]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMtest_show_descriptorFtest_logicalEpla2"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<2>>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<2>>>>>
-! CHECK:           %[[EMBOX_1:.*]] = fir.embox %[[DECLARE_0]] : (!fir.ref<!fir.logical<1>>) -> !fir.box<!fir.logical<1>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_1]]) fastmath<contract> : (!fir.box<!fir.logical<1>>) -> ()
-! CHECK:           %[[EMBOX_2:.*]] = fir.embox %[[DECLARE_1]] : (!fir.ref<!fir.logical<2>>) -> !fir.box<!fir.logical<2>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_2]]) fastmath<contract> : (!fir.box<!fir.logical<2>>) -> ()
-
-  call show_descriptor(la2)
-  call show_descriptor(pla2)
-! CHECK:           %[[CONVERT_0:.*]] = fir.convert %[[DECLARE_2]] : (!fir.ref<!fir.array<2x!fir.logical<2>>>) -> !fir.ref<!fir.array<?x!fir.logical<2>>>
-! CHECK:           %[[EMBOX_3:.*]] = fir.embox %[[CONVERT_0]](%[[SHAPE_0]]) : (!fir.ref<!fir.array<?x!fir.logical<2>>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?x!fir.logical<2>>>>
-! CHECK:           fir.store %[[EMBOX_3]] to %[[DECLARE_3]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<2>>>>>
-! CHECK:           %[[EMBOX_4:.*]] = fir.embox %[[DECLARE_2]](%[[SHAPE_0]]) : (!fir.ref<!fir.array<2x!fir.logical<2>>>, !fir.shape<1>) -> !fir.box<!fir.array<2x!fir.logical<2>>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_4]]) fastmath<contract> : (!fir.box<!fir.array<2x!fir.logical<2>>>) -> ()
-! CHECK:           %[[LOAD_0:.*]] = fir.load %[[DECLARE_3]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<2>>>>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[LOAD_0]]) fastmath<contract> : (!fir.box<!fir.ptr<!fir.array<?x!fir.logical<2>>>>) -> ()
-! CHECK:           return
-end subroutine test_logical
-
-subroutine test_real
-! CHECK-LABEL:   func.func @_QMtest_show_descriptorPtest_real() {
-  implicit none
-  real :: half = 0.5
-  real :: row(3) = (/ 1 , 2, 3 /)
-  real(kind=8) :: w(4) = (/ .00011_8 , .00012_8, .00013_8, .00014_8 /)
-! CHECK:           %[[C2:.*]] = arith.constant 2 : index
-! CHECK:           %[[C1:.*]] = arith.constant 1 : index
-! CHECK:           %[[C4:.*]] = arith.constant 4 : index
-! CHECK:           %[[C3:.*]] = arith.constant 3 : index
-! CHECK:           %[[DUMMY_SCOPE_2:.*]] = fir.dummy_scope : !fir.dscope
-! CHECK:           %[[ADDRESS_OF_4:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_realEhalf) : !fir.ref<f32>
-! CHECK:           %[[DECLARE_5:.*]] = fir.declare %[[ADDRESS_OF_4]] {uniq_name = "_QMtest_show_descriptorFtest_realEhalf"} : (!fir.ref<f32>) -> !fir.ref<f32>
-! CHECK:           %[[ADDRESS_OF_5:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_realErow) : !fir.ref<!fir.array<3xf32>>
-! CHECK:           %[[SHAPE_2:.*]] = fir.shape %[[C3]] : (index) -> !fir.shape<1>
-! CHECK:           %[[DECLARE_6:.*]] = fir.declare %[[ADDRESS_OF_5]](%[[SHAPE_2]]) {uniq_name = "_QMtest_show_descriptorFtest_realErow"} : (!fir.ref<!fir.array<3xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<3xf32>>
-! CHECK:           %[[ADDRESS_OF_6:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_realEw) : !fir.ref<!fir.array<4xf64>>
-! CHECK:           %[[SHAPE_3:.*]] = fir.shape %[[C4]] : (index) -> !fir.shape<1>
-! CHECK:           %[[DECLARE_7:.*]] = fir.declare %[[ADDRESS_OF_6]](%[[SHAPE_3]]) {uniq_name = "_QMtest_show_descriptorFtest_realEw"} : (!fir.ref<!fir.array<4xf64>>, !fir.shape<1>) -> !fir.ref<!fir.array<4xf64>>
-
-  call show_descriptor(half)
-  call show_descriptor(row)
-  call show_descriptor(w)
-  call show_descriptor(w(1:4:2))
-! CHECK:           %[[EMBOX_7:.*]] = fir.embox %[[DECLARE_5]] : (!fir.ref<f32>) -> !fir.box<f32>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_7]]) fastmath<contract> : (!fir.box<f32>) -> ()
-! CHECK:           %[[EMBOX_8:.*]] = fir.embox %[[DECLARE_6]](%[[SHAPE_2]]) : (!fir.ref<!fir.array<3xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xf32>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_8]]) fastmath<contract> : (!fir.box<!fir.array<3xf32>>) -> ()
-! CHECK:           %[[EMBOX_9:.*]] = fir.embox %[[DECLARE_7]](%[[SHAPE_3]]) : (!fir.ref<!fir.array<4xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<4xf64>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_9]]) fastmath<contract> : (!fir.box<!fir.array<4xf64>>) -> ()
-! CHECK:           %[[SHAPE_4:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1>
-! CHECK:           %[[UNDEFINED_0:.*]] = fir.undefined index
-! CHECK:           %[[SLICE_0:.*]] = fir.slice %[[C1]], %[[C4]], %[[C2]] : (index, index, index) -> !fir.slice<1>
-! CHECK:           %[[EMBOX_10:.*]] = fir.embox %[[DECLARE_7]](%[[SHAPE_3]]) {{\[}}%[[SLICE_0]]] : (!fir.ref<!fir.array<4xf64>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box<!fir.array<2xf64>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_10]]) fastmath<contract> : (!fir.box<!fir.array<2xf64>>) -> ()
-! CHECK:           return
-end subroutine test_real
-
-subroutine test_complex
-! CHECK-LABEL:   func.func @_QMtest_show_descriptorPtest_complex() {
-  implicit none
-  complex, parameter :: hr = 0.5
-  complex, parameter :: hi = (0, 0.5)
-  complex :: c1 = hr
-  complex :: c2 = hi
-  complex :: a2(2) = (/ hr, hi /)
-! CHECK:           %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32
-! CHECK:           %[[CST_1:.*]] = arith.constant 5.000000e-01 : f32
-! CHECK:           %[[C2:.*]] = arith.constant 2 : index
-! CHECK:           %[[ALLOCA_1:.*]] = fir.alloca complex<f32>
-! CHECK:           %[[ALLOCA_2:.*]] = fir.alloca complex<f32>
-! CHECK:           %[[DUMMY_SCOPE_3:.*]] = fir.dummy_scope : !fir.dscope
-! CHECK:           %[[ADDRESS_OF_7:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_complexEa2) : !fir.ref<!fir.array<2xcomplex<f32>>>
-! CHECK:           %[[SHAPE_5:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1>
-! CHECK:           %[[DECLARE_8:.*]] = fir.declare %[[ADDRESS_OF_7]](%[[SHAPE_5]]) {uniq_name = "_QMtest_show_descriptorFtest_complexEa2"} : (!fir.ref<!fir.array<2xcomplex<f32>>>, !fir.shape<1>) -> !fir.ref<!fir.array<2xcomplex<f32>>>
-! CHECK:           %[[ADDRESS_OF_8:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_complexEc1) : !fir.ref<complex<f32>>
-! CHECK:           %[[DECLARE_9:.*]] = fir.declare %[[ADDRESS_OF_8]] {uniq_name = "_QMtest_show_descriptorFtest_complexEc1"} : (!fir.ref<complex<f32>>) -> !fir.ref<complex<f32>>
-! CHECK:           %[[ADDRESS_OF_9:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_complexEc2) : !fir.ref<complex<f32>>
-! CHECK:           %[[DECLARE_10:.*]] = fir.declare %[[ADDRESS_OF_9]] {uniq_name = "_QMtest_show_descriptorFtest_complexEc2"} : (!fir.ref<complex<f32>>) -> !fir.ref<complex<f32>>
-! CHECK:           %[[ADDRESS_OF_10:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_complexEChi) : !fir.ref<complex<f32>>
-! CHECK:           %[[DECLARE_11:.*]] = fir.declare %[[ADDRESS_OF_10]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QMtest_show_descriptorFtest_complexEChi"} : (!fir.ref<complex<f32>>) -> !fir.ref<complex<f32>>
-! CHECK:           %[[ADDRESS_OF_11:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_complexEChr) : !fir.ref<complex<f32>>
-! CHECK:           %[[DECLARE_12:.*]] = fir.declare %[[ADDRESS_OF_11]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QMtest_show_descriptorFtest_complexEChr"} : (!fir.ref<complex<f32>>) -> !fir.ref<complex<f32>>
-! CHECK:           %[[UNDEFINED_1:.*]] = fir.undefined complex<f32>
-! CHECK:           %[[INSERT_VALUE_0:.*]] = fir.insert_value %[[UNDEFINED_1]], %[[CST_1]], [0 : index] : (complex<f32>, f32) -> complex<f32>
-! CHECK:           %[[INSERT_VALUE_1:.*]] = fir.insert_value %[[INSERT_VALUE_0]], %[[CST_0]], [1 : index] : (complex<f32>, f32) -> complex<f32>
-! CHECK:           fir.store %[[INSERT_VALUE_1]] to %[[ALLOCA_2]] : !fir.ref<complex<f32>>
-
-  call show_descriptor(hr)
-! CHECK:           %[[EMBOX_11:.*]] = fir.embox %[[ALLOCA_2]] : (!fir.ref<complex<f32>>) -> !fir.box<complex<f32>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_11]]) fastmath<contract> : (!fir.box<complex<f32>>) -> ()
-
-  call show_descriptor(hi)
-! CHECK:           %[[INSERT_VALUE_2:.*]] = fir.insert_value %[[UNDEFINED_1]], %[[CST_0]], [0 : index] : (complex<f32>, f32) -> complex<f32>
-! CHECK:           %[[INSERT_VALUE_3:.*]] = fir.insert_value %[[INSERT_VALUE_2]], %[[CST_1]], [1 : index] : (complex<f32>, f32) -> complex<f32>
-! CHECK:           fir.store %[[INSERT_VALUE_3]] to %[[ALLOCA_1]] : !fir.ref<complex<f32>>
-! CHECK:           %[[EMBOX_12:.*]] = fir.embox %[[ALLOCA_1]] : (!fir.ref<complex<f32>>) -> !fir.box<complex<f32>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_12]]) fastmath<contract> : (!fir.box<complex<f32>>) -> ()
-
-  call show_descriptor(a2)
-! CHECK:           %[[EMBOX_13:.*]] = fir.embox %[[DECLARE_8]](%[[SHAPE_5]]) : (!fir.ref<!fir.array<2xcomplex<f32>>>, !fir.shape<1>) -> !fir.box<!fir.array<2xcomplex<f32>>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_13]]) fastmath<contract> : (!fir.box<!fir.array<2xcomplex<f32>>>) -> ()
-! CHECK:           return
-end subroutine test_complex
-
-subroutine test_derived
-! CHECK-LABEL:   func.func @_QMtest_show_descriptorPtest_derived() {
-  implicit none
-  type :: t1
-     integer :: a
-     integer :: b
-  end type t1
-  type, extends (t1) :: t2
-     integer :: c
-  end type t2
-  type(t2) :: vt2 = t2(7,5,3)
-! CHECK:           %[[C0:.*]] = arith.constant 0 : index
-! CHECK:           %[[C2:.*]] = arith.constant 2 : index
-! CHECK:           %[[C1:.*]] = arith.constant 1 : index
-! CHECK:           %[[DUMMY_SCOPE_4:.*]] = fir.dummy_scope : !fir.dscope
-! CHECK:           %[[ADDRESS_OF_12:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_derivedE.n.a) : !fir.ref<!fir.char<1>>
-! CHECK:           %[[DECLARE_13:.*]] = fir.declare %[[ADDRESS_OF_12]] typeparams %[[C1]] {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QMtest_show_descriptorFtest_derivedE.n.a"} : (!fir.ref<!fir.char<1>>, index) -> !fir.ref<!fir.char<1>>
-! CHECK:           %[[ADDRESS_OF_13:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_derivedE.n.b) : !fir.ref<!fir.char<1>>
-! CHECK:           %[[DECLARE_14:.*]] = fir.declare %[[ADDRESS_OF_13]] typeparams %[[C1]] {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QMtest_show_descriptorFtest_derivedE.n.b"} : (!fir.ref<!fir.char<1>>, index) -> !fir.ref<!fir.char<1>>
-! CHECK:           %[[ADDRESS_OF_14:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_derivedE.n.t1) : !fir.ref<!fir.char<1,2>>
-! CHECK:           %[[DECLARE_15:.*]] = fir.declare %[[ADDRESS_OF_14]] typeparams %[[C2]] {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QMtest_show_descriptorFtest_derivedE.n.t1"} : (!fir.ref<!fir.char<1,2>>, index) -> !fir.ref<!fir.char<1,2>>
-! CHECK:           %[[ADDRESS_OF_15:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_derivedE.n.c) : !fir.ref<!fir.char<1>>
-! CHECK:           %[[DECLARE_16:.*]] = fir.declare %[[ADDRESS_OF_15]] typeparams %[[C1]] {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QMtest_show_descriptorFtest_derivedE.n.c"} : (!fir.ref<!fir.char<1>>, index) -> !fir.ref<!fir.char<1>>
-! CHECK:           %[[ADDRESS_OF_16:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_derivedE.n.t2) : !fir.ref<!fir.char<1,2>>
-! CHECK:           %[[DECLARE_17:.*]] = fir.declare %[[ADDRESS_OF_16]] typeparams %[[C2]] {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QMtest_show_descriptorFtest_derivedE.n.t2"} : (!fir.ref<!fir.char<1,2>>, index) -> !fir.ref<!fir.char<1,2>>
-! CHECK:           %[[ADDRESS_OF_17:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_derivedEvt2) : !fir.ref<!fir.type<_QMtest_show_descriptorFtest_derivedTt2{t1:!fir.type<_QMtest_show_descriptorFtest_derivedTt1{a:i32,b:i32}>,c:i32}>>
-! CHECK:           %[[DECLARE_18:.*]] = fir.declare %[[ADDRESS_OF_17]] {uniq_name = "_QMtest_show_descriptorFtest_derivedEvt2"} : (!fir.ref<!fir.type<_QMtest_show_descriptorFtest_derivedTt2{t1:!fir.type<_QMtest_show_descriptorFtest_derivedTt1{a:i32,b:i32}>,c:i32}>>) -> !fir.ref<!fir.type<_QMtest_show_descriptorFtest_derivedTt2{t1:!fir.type<_QMtest_show_descriptorFtest_derivedTt1{a:i32,b:i32}>,c:i32}>>
-
-  call show_descriptor(vt2)
-! CHECK:           %[[EMBOX_16:.*]] = fir.embox %[[DECLARE_18]] : (!fir.ref<!fir.type<_QMtest_show_descriptorFtest_derivedTt2{t1:!fir.type<_QMtest_show_descriptorFtest_derivedTt1{a:i32,b:i32}>,c:i32}>>) -> !fir.box<!fir.type<_QMtest_show_descriptorFtest_derivedTt2{t1:!fir.type<_QMtest_show_descriptorFtest_derivedTt1{a:i32,b:i32}>,c:i32}>>
-! CHECK:           fir.call @_FortranAShowDescriptor(%[[EMBOX_16]]) fastmath<contract> : (!fir.box<!fir.type<_QMtest_show_descriptorFtest_derivedTt2{t1:!fir.type<_QMtest_show_descriptorFtest_derivedTt1{a:i32,b:i32}>,c:i32}>>) -> ()
-! CHECK:           return
-end subroutine test_derived
-end module test_show_descriptor
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 3baffbd0d310d..a2b4d73f48384 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -31,7 +31,6 @@ set(MODULES_WITHOUT_IMPLEMENTATION
   "iso_fortran_env"
   "f90deviceio"
   "iso_fortran_env_impl"
-  "flang_debug"
 )
 
 set(MODULES ${MODULES_WITH_IMPLEMENTATION} ${MODULES_WITHOUT_IMPLEMENTATION})
diff --git a/libclc/opencl/lib/generic/atomic/atomic_def.inc b/libclc/opencl/lib/generic/atomic/atomic_def.inc
index a4ccab5990888..e6b7c831e10d3 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_def.inc
+++ b/libclc/opencl/lib/generic/atomic/atomic_def.inc
@@ -12,7 +12,8 @@
                                  defined(cl_khr_int64_extended_atomics))
 #define __CLC_HAVE_64_ATOMIC
 #endif
-#if defined(__CLC_FPSIZE) && (__CLC_FPSIZE < 64 || defined(__CLC_HAVE_64_ATOMIC)
+#if defined(__CLC_FPSIZE) &&                                                   \
+    (__CLC_FPSIZE < 64 || defined(__CLC_HAVE_64_ATOMIC))
 #define __CLC_HAVE_FP_ATOMIC
 #endif
 #if defined(__CLC_GENSIZE) &&                                                  \
diff --git a/libcxx/include/ext/hash_map b/libcxx/include/ext/hash_map
index 01ca7498f0cc1..09c981131ff96 100644
--- a/libcxx/include/ext/hash_map
+++ b/libcxx/include/ext/hash_map
@@ -570,10 +570,7 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map(
 }
 
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
-hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map(const hash_map& __u) : __table_(__u.__table_) {
-  __table_.__rehash_unique(__u.bucket_count());
-  insert(__u.begin(), __u.end());
-}
+hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map(const hash_map& __u) : __table_(__u.__table_) {}
 
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 typename hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::__node_holder
diff --git a/libcxx/include/ext/hash_set b/libcxx/include/ext/hash_set
index 2796774fee24a..56aa4d8a47eeb 100644
--- a/libcxx/include/ext/hash_set
+++ b/libcxx/include/ext/hash_set
@@ -356,10 +356,7 @@ hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set(
 }
 
 template <class _Value, class _Hash, class _Pred, class _Alloc>
-hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set(const hash_set& __u) : __table_(__u.__table_) {
-  __table_.__rehash_unique(__u.bucket_count());
-  insert(__u.begin(), __u.end());
-}
+hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set(const hash_set& __u) : __table_(__u.__table_) {}
 
 template <class _Value, class _Hash, class _Pred, class _Alloc>
 template <class _InputIterator>
diff --git a/libcxx/test/extensions/gnu/hash_map/copy.pass.cpp b/libcxx/test/extensions/gnu/hash_map/copy.pass.cpp
new file mode 100644
index 0000000000000..65b8debda0676
--- /dev/null
+++ b/libcxx/test/extensions/gnu/hash_map/copy.pass.cpp
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// ADDITIONAL_COMPILE_FLAGS: -Wno-deprecated
+
+// hash_map::hash_map(const hash_map&)
+
+#include <cassert>
+#include <ext/hash_map>
+
+int main(int, char**) {
+  __gnu_cxx::hash_map<int, int> map;
+
+  map.insert(std::make_pair(1, 1));
+  map.insert(std::make_pair(2, 1));
+
+  auto map2 = map;
+
+  assert(map2.size() == 2);
+
+  return 0;
+}
diff --git a/libcxx/test/extensions/gnu/hash_set/copy.pass.cpp b/libcxx/test/extensions/gnu/hash_set/copy.pass.cpp
new file mode 100644
index 0000000000000..95a3579194923
--- /dev/null
+++ b/libcxx/test/extensions/gnu/hash_set/copy.pass.cpp
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// ADDITIONAL_COMPILE_FLAGS: -Wno-deprecated
+
+// hash_set::hash_set(const hash_set&)
+
+#include <cassert>
+#include <ext/hash_set>
+
+int main(int, char**) {
+  __gnu_cxx::hash_set<int> set;
+
+  set.insert(1);
+  set.insert(2);
+
+  auto set2 = set;
+
+  assert(set2.size() == 2);
+
+  return 0;
+}
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index fb9e5df648c75..eb299099a9b71 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -154,7 +154,7 @@ def bp_startup_sort: JJ<"bp-startup-sort=">, MetaVarName<"[none,function]">,
 
 // Auxiliary options related to balanced partition
 defm bp_compression_sort_startup_functions: BB<"bp-compression-sort-startup-functions",
-  "When --irpgo-profile is pecified, prioritize function similarity for compression in addition to startup time", "">;
+  "When --irpgo-profile is specified, prioritize function similarity for compression in addition to startup time", "">;
 def verbose_bp_section_orderer: FF<"verbose-bp-section-orderer">,
   HelpText<"Print information on balanced partitioning">;
 
diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig
index ef501fbafc947..0ba152166522b 100644
--- a/lldb/bindings/python/python-wrapper.swig
+++ b/lldb/bindings/python/python-wrapper.swig
@@ -425,6 +425,18 @@ void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBBreakpoint(PyObject *
   return sb_ptr;
 }
 
+void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBThread(PyObject * data) {
+  lldb::SBThread *sb_ptr = nullptr;
+
+  int valid_cast =
+      SWIG_ConvertPtr(data, (void **)&sb_ptr, SWIGTYPE_p_lldb__SBThread, 0);
+
+  if (valid_cast == -1)
+    return NULL;
+
+  return sb_ptr;
+}
+
 void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBFrame(PyObject * data) {
   lldb::SBFrame *sb_ptr = nullptr;
 
diff --git a/lldb/examples/python/templates/scripted_frame_provider.py b/lldb/examples/python/templates/scripted_frame_provider.py
index 20f4d76d188c2..7a72f1a24c9da 100644
--- a/lldb/examples/python/templates/scripted_frame_provider.py
+++ b/lldb/examples/python/templates/scripted_frame_provider.py
@@ -31,7 +31,54 @@ class ScriptedFrameProvider(metaclass=ABCMeta):
         )
     """
 
+    @staticmethod
+    def applies_to_thread(thread):
+        """Determine if this frame provider should be used for a given thread.
+
+        This static method is called before creating an instance of the frame
+        provider to determine if it should be applied to a specific thread.
+        Override this method to provide custom filtering logic.
+
+        Args:
+            thread (lldb.SBThread): The thread to check.
+
+        Returns:
+            bool: True if this frame provider should be used for the thread,
+                False otherwise. The default implementation returns True for
+                all threads.
+
+        Example:
+
+        .. code-block:: python
+
+            @staticmethod
+            def applies_to_thread(thread):
+                # Only apply to thread 1
+                return thread.GetIndexID() == 1
+        """
+        return True
+
+    @staticmethod
     @abstractmethod
+    def get_description():
+        """Get a description of this frame provider.
+
+        This method should return a human-readable string describing what
+        this frame provider does. The description is used for debugging
+        and display purposes.
+
+        Returns:
+            str: A description of the frame provider.
+
+        Example:
+
+        .. code-block:: python
+
+            def get_description(self):
+                return "Crash log frame provider for thread 1"
+        """
+        pass
+
     def __init__(self, input_frames, args):
         """Construct a scripted frame provider.
 
diff --git a/lldb/examples/python/templates/scripted_process.py b/lldb/examples/python/templates/scripted_process.py
index b4232f632a30a..24aa9818bb989 100644
--- a/lldb/examples/python/templates/scripted_process.py
+++ b/lldb/examples/python/templates/scripted_process.py
@@ -243,6 +243,7 @@ def __init__(self, process, args):
                 key/value pairs used by the scripted thread.
         """
         self.target = None
+        self.arch = None
         self.originating_process = None
         self.process = None
         self.args = None
@@ -264,6 +265,9 @@ def __init__(self, process, args):
             and process.IsValid()
         ):
             self.target = process.target
+            triple = self.target.triple
+            if triple:
+                self.arch = triple.split("-")[0]
             self.originating_process = process
             self.process = self.target.GetProcess()
             self.get_register_info()
@@ -350,17 +354,14 @@ def get_stackframes(self):
     def get_register_info(self):
         if self.register_info is None:
             self.register_info = dict()
-            if "x86_64" in self.originating_process.arch:
+            if "x86_64" in self.arch:
                 self.register_info["sets"] = ["General Purpose Registers"]
                 self.register_info["registers"] = INTEL64_GPR
-            elif (
-                "arm64" in self.originating_process.arch
-                or self.originating_process.arch == "aarch64"
-            ):
+            elif "arm64" in self.arch or self.arch == "aarch64":
                 self.register_info["sets"] = ["General Purpose Registers"]
                 self.register_info["registers"] = ARM64_GPR
             else:
-                raise ValueError("Unknown architecture", self.originating_process.arch)
+                raise ValueError("Unknown architecture", self.arch)
         return self.register_info
 
     @abstractmethod
@@ -403,11 +404,12 @@ def __init__(self, thread, args):
         """Construct a scripted frame.
 
         Args:
-            thread (ScriptedThread): The thread owning this frame.
+            thread (ScriptedThread/lldb.SBThread): The thread owning this frame.
             args (lldb.SBStructuredData): A Dictionary holding arbitrary
                 key/value pairs used by the scripted frame.
         """
         self.target = None
+        self.arch = None
         self.originating_thread = None
         self.thread = None
         self.args = None
@@ -417,15 +419,17 @@ def __init__(self, thread, args):
         self.register_ctx = {}
         self.variables = []
 
-        if (
-            isinstance(thread, ScriptedThread)
-            or isinstance(thread, lldb.SBThread)
-            and thread.IsValid()
+        if isinstance(thread, ScriptedThread) or (
+            isinstance(thread, lldb.SBThread) and thread.IsValid()
         ):
-            self.target = thread.target
             self.process = thread.process
+            self.target = self.process.target
+            triple = self.target.triple
+            if triple:
+                self.arch = triple.split("-")[0]
+            tid = thread.tid if isinstance(thread, ScriptedThread) else thread.id
             self.originating_thread = thread
-            self.thread = self.process.GetThreadByIndexID(thread.tid)
+            self.thread = self.process.GetThreadByIndexID(tid)
             self.get_register_info()
 
     @abstractmethod
@@ -506,7 +510,18 @@ def get_variables(self, filters):
 
     def get_register_info(self):
         if self.register_info is None:
-            self.register_info = self.originating_thread.get_register_info()
+            if isinstance(self.originating_thread, ScriptedThread):
+                self.register_info = self.originating_thread.get_register_info()
+            elif isinstance(self.originating_thread, lldb.SBThread):
+                self.register_info = dict()
+                if "x86_64" in self.arch:
+                    self.register_info["sets"] = ["General Purpose Registers"]
+                    self.register_info["registers"] = INTEL64_GPR
+                elif "arm64" in self.arch or self.arch == "aarch64":
+                    self.register_info["sets"] = ["General Purpose Registers"]
+                    self.register_info["registers"] = ARM64_GPR
+                else:
+                    raise ValueError("Unknown architecture", self.arch)
         return self.register_info
 
     @abstractmethod
@@ -640,12 +655,12 @@ def get_stop_reason(self):
 
             # TODO: Passthrough stop reason from driving process
             if self.driving_thread.GetStopReason() != lldb.eStopReasonNone:
-                if "arm64" in self.originating_process.arch:
+                if "arm64" in self.arch:
                     stop_reason["type"] = lldb.eStopReasonException
                     stop_reason["data"]["desc"] = (
                         self.driving_thread.GetStopDescription(100)
                     )
-                elif self.originating_process.arch == "x86_64":
+                elif self.arch == "x86_64":
                     stop_reason["type"] = lldb.eStopReasonSignal
                     stop_reason["data"]["signal"] = signal.SIGTRAP
                 else:
diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h
index ce81ae46a0905..0318492f1054c 100644
--- a/lldb/include/lldb/API/SBTarget.h
+++ b/lldb/include/lldb/API/SBTarget.h
@@ -19,6 +19,7 @@
 #include "lldb/API/SBLaunchInfo.h"
 #include "lldb/API/SBStatisticsOptions.h"
 #include "lldb/API/SBSymbolContextList.h"
+#include "lldb/API/SBThreadCollection.h"
 #include "lldb/API/SBType.h"
 #include "lldb/API/SBValue.h"
 #include "lldb/API/SBWatchpoint.h"
@@ -1003,6 +1004,35 @@ class LLDB_API SBTarget {
 
   lldb::SBMutex GetAPIMutex() const;
 
+  /// Register a scripted frame provider for this target.
+  /// If a scripted frame provider with the same name and same argument
+  /// dictionary is already registered on this target, it will be overwritten.
+  ///
+  /// \param[in] class_name
+  ///     The name of the Python class that implements the frame provider.
+  ///
+  /// \param[in] args_dict
+  ///     A dictionary of arguments to pass to the frame provider class.
+  ///
+  /// \param[out] error
+  ///     An error object indicating success or failure.
+  ///
+  /// \return
+  ///     A unique identifier for the frame provider descriptor that was
+  ///     registered. 0 if the registration failed.
+  uint32_t RegisterScriptedFrameProvider(const char *class_name,
+                                         lldb::SBStructuredData args_dict,
+                                         lldb::SBError &error);
+
+  /// Remove a scripted frame provider from this target by name.
+  ///
+  /// \param[in] provider_id
+  ///     The id of the frame provider class to remove.
+  ///
+  /// \return
+  ///     An error object indicating success or failure.
+  lldb::SBError RemoveScriptedFrameProvider(uint32_t provider_id);
+
 protected:
   friend class SBAddress;
   friend class SBAddressRange;
diff --git a/lldb/include/lldb/API/SBThread.h b/lldb/include/lldb/API/SBThread.h
index f6a6d19935b83..639e7a0a1a5c0 100644
--- a/lldb/include/lldb/API/SBThread.h
+++ b/lldb/include/lldb/API/SBThread.h
@@ -256,6 +256,7 @@ class LLDB_API SBThread {
   friend class SBThreadPlan;
   friend class SBTrace;
 
+  friend class lldb_private::ScriptInterpreter;
   friend class lldb_private::python::SWIGBridge;
 
   SBThread(const lldb::ThreadSP &lldb_object_sp);
diff --git a/lldb/include/lldb/API/SBThreadCollection.h b/lldb/include/lldb/API/SBThreadCollection.h
index 5a052e6246026..d13dea0f11cd2 100644
--- a/lldb/include/lldb/API/SBThreadCollection.h
+++ b/lldb/include/lldb/API/SBThreadCollection.h
@@ -46,6 +46,7 @@ class LLDB_API SBThreadCollection {
   void SetOpaque(const lldb::ThreadCollectionSP &threads);
 
 private:
+  friend class SBTarget;
   friend class SBProcess;
   friend class SBThread;
   friend class SBSaveCoreOptions;
diff --git a/lldb/include/lldb/Core/FormatEntity.h b/lldb/include/lldb/Core/FormatEntity.h
index 40916dc48a70b..107c30a000979 100644
--- a/lldb/include/lldb/Core/FormatEntity.h
+++ b/lldb/include/lldb/Core/FormatEntity.h
@@ -81,6 +81,7 @@ struct Entry {
     FrameRegisterByName,
     FrameIsArtificial,
     FrameKind,
+    FrameBorrowedInfo,
     ScriptFrame,
     FunctionID,
     FunctionDidChange,
diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h
index 2d9f713676f90..49b60131399d5 100644
--- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h
+++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h
@@ -16,11 +16,29 @@
 namespace lldb_private {
 class ScriptedFrameProviderInterface : public ScriptedInterface {
 public:
+  virtual bool AppliesToThread(llvm::StringRef class_name,
+                               lldb::ThreadSP thread_sp) {
+    return true;
+  }
+
   virtual llvm::Expected<StructuredData::GenericSP>
   CreatePluginObject(llvm::StringRef class_name,
                      lldb::StackFrameListSP input_frames,
                      StructuredData::DictionarySP args_sp) = 0;
 
+  /// Get a description string for the frame provider.
+  ///
+  /// This is called by the descriptor to fetch a description from the
+  /// scripted implementation. Implementations should call a static method
+  /// on the scripting class to retrieve the description.
+  ///
+  /// \param class_name The name of the scripting class implementing the
+  /// provider.
+  ///
+  /// \return A string describing what this frame provider does, or an
+  ///         empty string if no description is available.
+  virtual std::string GetDescription(llvm::StringRef class_name) { return {}; }
+
   virtual StructuredData::ObjectSP GetFrameAtIndex(uint32_t index) {
     return {};
   }
diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h
index 7fed4940b85bf..0b91d6756552d 100644
--- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h
+++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h
@@ -21,6 +21,7 @@
 #include "lldb/API/SBMemoryRegionInfo.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/API/SBSymbolContext.h"
+#include "lldb/API/SBThread.h"
 #include "lldb/Breakpoint/BreakpointOptions.h"
 #include "lldb/Core/PluginInterface.h"
 #include "lldb/Core/SearchFilter.h"
@@ -580,6 +581,8 @@ class ScriptInterpreter : public PluginInterface {
 
   lldb::StreamSP GetOpaqueTypeFromSBStream(const lldb::SBStream &stream) const;
 
+  lldb::ThreadSP GetOpaqueTypeFromSBThread(const lldb::SBThread &exe_ctx) const;
+
   lldb::StackFrameSP GetOpaqueTypeFromSBFrame(const lldb::SBFrame &frame) const;
 
   SymbolContext
diff --git a/lldb/include/lldb/Target/BorrowedStackFrame.h b/lldb/include/lldb/Target/BorrowedStackFrame.h
new file mode 100644
index 0000000000000..72e7777961da7
--- /dev/null
+++ b/lldb/include/lldb/Target/BorrowedStackFrame.h
@@ -0,0 +1,146 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_TARGET_BORROWEDSTACKFRAME_H
+#define LLDB_TARGET_BORROWEDSTACKFRAME_H
+
+#include "lldb/Target/StackFrame.h"
+
+namespace lldb_private {
+
+/// \class BorrowedStackFrame BorrowedStackFrame.h
+/// "lldb/Target/BorrowedStackFrame.h"
+///
+/// A wrapper around an existing StackFrame that supersedes its frame indices.
+///
+/// This class is useful when you need to present an existing stack frame
+/// with a different index, such as when creating synthetic frame views or
+/// renumbering frames without copying all the underlying data.
+///
+/// All methods delegate to the borrowed frame except for GetFrameIndex()
+/// & GetConcreteFrameIndex() which uses the overridden indices.
+class BorrowedStackFrame : public StackFrame {
+public:
+  /// Construct a BorrowedStackFrame that wraps an existing frame.
+  ///
+  /// \param [in] borrowed_frame_sp
+  ///   The existing StackFrame to borrow from. This frame's data will be
+  ///   used for all operations except frame index queries.
+  ///
+  /// \param [in] new_frame_index
+  ///   The frame index to report instead of the borrowed frame's index.
+  ///
+  /// \param [in] new_concrete_frame_index
+  ///   Optional concrete frame index. If not provided, defaults to
+  ///   new_frame_index.
+  BorrowedStackFrame(
+      lldb::StackFrameSP borrowed_frame_sp, uint32_t new_frame_index,
+      std::optional<uint32_t> new_concrete_frame_index = std::nullopt);
+
+  ~BorrowedStackFrame() override = default;
+
+  uint32_t GetFrameIndex() const override;
+  void SetFrameIndex(uint32_t index);
+
+  /// Get the concrete frame index for this borrowed frame.
+  ///
+  /// Returns the overridden concrete frame index provided at construction,
+  /// or LLDB_INVALID_FRAME_ID if the borrowed frame represents an inlined
+  /// function, since this would require some computation if we chain inlined
+  /// borrowed stack frames.
+  ///
+  /// \return
+  ///   The concrete frame index, or LLDB_INVALID_FRAME_ID for inline frames.
+  uint32_t GetConcreteFrameIndex() override;
+
+  StackID &GetStackID() override;
+
+  const Address &GetFrameCodeAddress() override;
+
+  Address GetFrameCodeAddressForSymbolication() override;
+
+  bool ChangePC(lldb::addr_t pc) override;
+
+  const SymbolContext &
+  GetSymbolContext(lldb::SymbolContextItem resolve_scope) override;
+
+  llvm::Error GetFrameBaseValue(Scalar &value) override;
+
+  DWARFExpressionList *GetFrameBaseExpression(Status *error_ptr) override;
+
+  Block *GetFrameBlock() override;
+
+  lldb::RegisterContextSP GetRegisterContext() override;
+
+  VariableList *GetVariableList(bool get_file_globals,
+                                Status *error_ptr) override;
+
+  lldb::VariableListSP
+  GetInScopeVariableList(bool get_file_globals,
+                         bool must_have_valid_location = false) override;
+
+  lldb::ValueObjectSP GetValueForVariableExpressionPath(
+      llvm::StringRef var_expr, lldb::DynamicValueType use_dynamic,
+      uint32_t options, lldb::VariableSP &var_sp, Status &error) override;
+
+  bool HasDebugInformation() override;
+
+  const char *Disassemble() override;
+
+  lldb::ValueObjectSP
+  GetValueObjectForFrameVariable(const lldb::VariableSP &variable_sp,
+                                 lldb::DynamicValueType use_dynamic) override;
+
+  bool IsInlined() override;
+
+  bool IsSynthetic() const override;
+
+  bool IsHistorical() const override;
+
+  bool IsArtificial() const override;
+
+  bool IsHidden() override;
+
+  const char *GetFunctionName() override;
+
+  const char *GetDisplayFunctionName() override;
+
+  lldb::ValueObjectSP FindVariable(ConstString name) override;
+
+  SourceLanguage GetLanguage() override;
+
+  SourceLanguage GuessLanguage() override;
+
+  lldb::ValueObjectSP GuessValueForAddress(lldb::addr_t addr) override;
+
+  lldb::ValueObjectSP GuessValueForRegisterAndOffset(ConstString reg,
+                                                     int64_t offset) override;
+
+  StructuredData::ObjectSP GetLanguageSpecificData() override;
+
+  lldb::RecognizedStackFrameSP GetRecognizedFrame() override;
+
+  /// Get the underlying borrowed frame.
+  lldb::StackFrameSP GetBorrowedFrame() const;
+
+  bool isA(const void *ClassID) const override;
+  static bool classof(const StackFrame *obj);
+
+private:
+  lldb::StackFrameSP m_borrowed_frame_sp;
+  uint32_t m_new_frame_index;
+  uint32_t m_new_concrete_frame_index;
+  static char ID;
+
+  BorrowedStackFrame(const BorrowedStackFrame &) = delete;
+  const BorrowedStackFrame &operator=(const BorrowedStackFrame &) = delete;
+};
+
+} // namespace lldb_private
+
+#endif // LLDB_TARGET_BORROWEDSTACKFRAME_H
diff --git a/lldb/include/lldb/Target/StackFrame.h b/lldb/include/lldb/Target/StackFrame.h
index 135bd81e4e8d4..46922448d6e59 100644
--- a/lldb/include/lldb/Target/StackFrame.h
+++ b/lldb/include/lldb/Target/StackFrame.h
@@ -43,6 +43,13 @@ namespace lldb_private {
 class StackFrame : public ExecutionContextScope,
                    public std::enable_shared_from_this<StackFrame> {
 public:
+  /// LLVM RTTI support.
+  /// \{
+  static char ID;
+  virtual bool isA(const void *ClassID) const { return ClassID == &ID; }
+  static bool classof(const StackFrame *obj) { return obj->isA(&ID); }
+  /// \}
+
   enum ExpressionPathOption {
     eExpressionPathOptionCheckPtrVsMember = (1u << 0),
     eExpressionPathOptionsNoFragileObjcIvar = (1u << 1),
@@ -127,7 +134,7 @@ class StackFrame : public ExecutionContextScope,
 
   lldb::ThreadSP GetThread() const { return m_thread_wp.lock(); }
 
-  StackID &GetStackID();
+  virtual StackID &GetStackID();
 
   /// Get an Address for the current pc value in this StackFrame.
   ///
@@ -135,7 +142,7 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \return
   ///   The Address object set to the current PC value.
-  const Address &GetFrameCodeAddress();
+  virtual const Address &GetFrameCodeAddress();
 
   /// Get the current code Address suitable for symbolication,
   /// may not be the same as GetFrameCodeAddress().
@@ -153,7 +160,7 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \return
   ///   The Address object set to the current PC value.
-  Address GetFrameCodeAddressForSymbolication();
+  virtual Address GetFrameCodeAddressForSymbolication();
 
   /// Change the pc value for a given thread.
   ///
@@ -165,7 +172,7 @@ class StackFrame : public ExecutionContextScope,
   /// \return
   ///     true if the pc was changed.  false if this failed -- possibly
   ///     because this frame is not a live StackFrame.
-  bool ChangePC(lldb::addr_t pc);
+  virtual bool ChangePC(lldb::addr_t pc);
 
   /// Provide a SymbolContext for this StackFrame's current pc value.
   ///
@@ -181,7 +188,8 @@ class StackFrame : public ExecutionContextScope,
   /// \return
   ///   A SymbolContext reference which includes the types of information
   ///   requested by resolve_scope, if they are available.
-  const SymbolContext &GetSymbolContext(lldb::SymbolContextItem resolve_scope);
+  virtual const SymbolContext &
+  GetSymbolContext(lldb::SymbolContextItem resolve_scope);
 
   /// Return the Canonical Frame Address (DWARF term) for this frame.
   ///
@@ -199,7 +207,7 @@ class StackFrame : public ExecutionContextScope,
   /// \return
   ///   If there is an error determining the CFA address, return an error
   ///   explaining the failure. Success otherwise.
-  llvm::Error GetFrameBaseValue(Scalar &value);
+  virtual llvm::Error GetFrameBaseValue(Scalar &value);
 
   /// Get the DWARFExpressionList corresponding to the Canonical Frame Address.
   ///
@@ -211,7 +219,7 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \return
   ///   Returns the corresponding DWARF expression, or NULL.
-  DWARFExpressionList *GetFrameBaseExpression(Status *error_ptr);
+  virtual DWARFExpressionList *GetFrameBaseExpression(Status *error_ptr);
 
   /// Get the current lexical scope block for this StackFrame, if possible.
   ///
@@ -221,7 +229,7 @@ class StackFrame : public ExecutionContextScope,
   /// \return
   ///   A pointer to the current Block.  nullptr is returned if this can
   ///   not be provided.
-  Block *GetFrameBlock();
+  virtual Block *GetFrameBlock();
 
   /// Get the RegisterContext for this frame, if possible.
   ///
@@ -235,7 +243,7 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \return
   ///   The RegisterContext shared point for this frame.
-  lldb::RegisterContextSP GetRegisterContext();
+  virtual lldb::RegisterContextSP GetRegisterContext();
 
   const lldb::RegisterContextSP &GetRegisterContextSP() const {
     return m_reg_context_sp;
@@ -261,7 +269,8 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \return
   ///     A pointer to a list of variables.
-  VariableList *GetVariableList(bool get_file_globals, Status *error_ptr);
+  virtual VariableList *GetVariableList(bool get_file_globals,
+                                        Status *error_ptr);
 
   /// Retrieve the list of variables that are in scope at this StackFrame's
   /// pc.
@@ -280,7 +289,7 @@ class StackFrame : public ExecutionContextScope,
   ///     StackFrame's pc.
   /// \return
   ///     A pointer to a list of variables.
-  lldb::VariableListSP
+  virtual lldb::VariableListSP
   GetInScopeVariableList(bool get_file_globals,
                          bool must_have_valid_location = false);
 
@@ -309,7 +318,7 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \return
   ///     A shared pointer to the ValueObject described by var_expr.
-  lldb::ValueObjectSP GetValueForVariableExpressionPath(
+  virtual lldb::ValueObjectSP GetValueForVariableExpressionPath(
       llvm::StringRef var_expr, lldb::DynamicValueType use_dynamic,
       uint32_t options, lldb::VariableSP &var_sp, Status &error);
 
@@ -318,14 +327,14 @@ class StackFrame : public ExecutionContextScope,
   /// \return
   ///    true if debug information is available for this frame (function,
   ///    compilation unit, block, etc.)
-  bool HasDebugInformation();
+  virtual bool HasDebugInformation();
 
   /// Return the disassembly for the instructions of this StackFrame's
   /// function as a single C string.
   ///
   /// \return
   ///    C string with the assembly instructions for this function.
-  const char *Disassemble();
+  virtual const char *Disassemble();
 
   /// Print a description of this frame using the provided frame format.
   ///
@@ -337,9 +346,9 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \return
   ///   \b true if and only if dumping with the given \p format worked.
-  bool DumpUsingFormat(Stream &strm,
-                       const lldb_private::FormatEntity::Entry *format,
-                       llvm::StringRef frame_marker = {});
+  virtual bool DumpUsingFormat(Stream &strm,
+                               const lldb_private::FormatEntity::Entry *format,
+                               llvm::StringRef frame_marker = {});
 
   /// Print a description for this frame using the frame-format formatter
   /// settings. If the current frame-format settings are invalid, then the
@@ -353,8 +362,8 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \param [in] frame_marker
   ///   Optional string that will be prepended to the frame output description.
-  void DumpUsingSettingsFormat(Stream *strm, bool show_unique = false,
-                               const char *frame_marker = nullptr);
+  virtual void DumpUsingSettingsFormat(Stream *strm, bool show_unique = false,
+                                       const char *frame_marker = nullptr);
 
   /// Print a description for this frame using a default format.
   ///
@@ -366,7 +375,7 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \param [in] show_fullpaths
   ///   Whether to print the full source paths or just the file base name.
-  void Dump(Stream *strm, bool show_frame_index, bool show_fullpaths);
+  virtual void Dump(Stream *strm, bool show_frame_index, bool show_fullpaths);
 
   /// Print a description of this stack frame and/or the source
   /// context/assembly for this stack frame.
@@ -389,8 +398,9 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \return
   ///   Returns true if successful.
-  bool GetStatus(Stream &strm, bool show_frame_info, bool show_source,
-                 bool show_unique = false, const char *frame_marker = nullptr);
+  virtual bool GetStatus(Stream &strm, bool show_frame_info, bool show_source,
+                         bool show_unique = false,
+                         const char *frame_marker = nullptr);
 
   /// Query whether this frame is a concrete frame on the call stack, or if it
   /// is an inlined frame derived from the debug information and presented by
@@ -401,10 +411,10 @@ class StackFrame : public ExecutionContextScope,
   virtual bool IsInlined();
 
   /// Query whether this frame is synthetic.
-  bool IsSynthetic() const;
+  virtual bool IsSynthetic() const;
 
   /// Query whether this frame is part of a historical backtrace.
-  bool IsHistorical() const;
+  virtual bool IsHistorical() const;
 
   /// Query whether this frame is artificial (e.g a synthesized result of
   /// inferring missing tail call frames from a backtrace). Artificial frames
@@ -419,7 +429,7 @@ class StackFrame : public ExecutionContextScope,
   /// Language plugins can use this API to report language-specific
   /// runtime information about this compile unit, such as additional
   /// language version details or feature flags.
-  StructuredData::ObjectSP GetLanguageSpecificData();
+  virtual StructuredData::ObjectSP GetLanguageSpecificData();
 
   /// Get the frame's demangled name.
   ///
@@ -439,9 +449,9 @@ class StackFrame : public ExecutionContextScope,
   /// \return
   ///   StackFrame index 0 indicates the currently-executing function.  Inline
   ///   frames are included in this frame index count.
-  uint32_t GetFrameIndex() const;
+  virtual uint32_t GetFrameIndex() const;
 
-  /// Set this frame's synthetic frame index.
+  /// Set this frame's frame index.
   void SetFrameIndex(uint32_t index) { m_frame_index = index; }
 
   /// Query this frame to find what frame it is in this Thread's
@@ -452,7 +462,7 @@ class StackFrame : public ExecutionContextScope,
   ///   frames are not included in this frame index count; their concrete
   ///   frame index will be the same as the concrete frame that they are
   ///   derived from.
-  uint32_t GetConcreteFrameIndex() const { return m_concrete_frame_index; }
+  virtual uint32_t GetConcreteFrameIndex() { return m_concrete_frame_index; }
 
   /// Create a ValueObject for a given Variable in this StackFrame.
   ///
@@ -466,7 +476,7 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \return
   ///     A ValueObject for this variable.
-  lldb::ValueObjectSP
+  virtual lldb::ValueObjectSP
   GetValueObjectForFrameVariable(const lldb::VariableSP &variable_sp,
                                  lldb::DynamicValueType use_dynamic);
 
@@ -474,11 +484,11 @@ class StackFrame : public ExecutionContextScope,
   /// parsing expressions given the execution context.
   ///
   /// \return   The language of the frame if known.
-  SourceLanguage GetLanguage();
+  virtual SourceLanguage GetLanguage();
 
   /// Similar to GetLanguage(), but is allowed to take a potentially incorrect
   /// guess if exact information is not available.
-  SourceLanguage GuessLanguage();
+  virtual SourceLanguage GuessLanguage();
 
   /// Attempt to econstruct the ValueObject for a given raw address touched by
   /// the current instruction.  The ExpressionPath should indicate how to get
@@ -489,7 +499,7 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \return
   ///   The ValueObject if found.  If valid, it has a valid ExpressionPath.
-  lldb::ValueObjectSP GuessValueForAddress(lldb::addr_t addr);
+  virtual lldb::ValueObjectSP GuessValueForAddress(lldb::addr_t addr);
 
   /// Attempt to reconstruct the ValueObject for the address contained in a
   /// given register plus an offset.  The ExpressionPath should indicate how
@@ -503,8 +513,8 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \return
   ///   The ValueObject if found.  If valid, it has a valid ExpressionPath.
-  lldb::ValueObjectSP GuessValueForRegisterAndOffset(ConstString reg,
-                                                     int64_t offset);
+  virtual lldb::ValueObjectSP GuessValueForRegisterAndOffset(ConstString reg,
+                                                             int64_t offset);
 
   /// Attempt to reconstruct the ValueObject for a variable with a given \a name
   /// from within the current StackFrame, within the current block. The search
@@ -517,7 +527,7 @@ class StackFrame : public ExecutionContextScope,
   ///
   /// \return
   ///   The ValueObject if found.
-  lldb::ValueObjectSP FindVariable(ConstString name);
+  virtual lldb::ValueObjectSP FindVariable(ConstString name);
 
   // lldb::ExecutionContextScope pure virtual functions
   lldb::TargetSP CalculateTarget() override;
@@ -530,7 +540,7 @@ class StackFrame : public ExecutionContextScope,
 
   void CalculateExecutionContext(ExecutionContext &exe_ctx) override;
 
-  lldb::RecognizedStackFrameSP GetRecognizedFrame();
+  virtual lldb::RecognizedStackFrameSP GetRecognizedFrame();
 
   /// Get the StackFrameList that contains this frame.
   ///
@@ -546,7 +556,9 @@ class StackFrame : public ExecutionContextScope,
   }
 
 protected:
+  friend class BorrowedStackFrame;
   friend class StackFrameList;
+  friend class SyntheticStackFrameList;
 
   void SetSymbolContextScope(SymbolContextScope *symbol_scope);
 
diff --git a/lldb/include/lldb/Target/StackFrameList.h b/lldb/include/lldb/Target/StackFrameList.h
index 8c14e92a41a4e..539c070ff0f4b 100644
--- a/lldb/include/lldb/Target/StackFrameList.h
+++ b/lldb/include/lldb/Target/StackFrameList.h
@@ -26,7 +26,7 @@ class StackFrameList : public std::enable_shared_from_this<StackFrameList> {
   StackFrameList(Thread &thread, const lldb::StackFrameListSP &prev_frames_sp,
                  bool show_inline_frames);
 
-  ~StackFrameList();
+  virtual ~StackFrameList();
 
   /// Get the number of visible frames. Frames may be created if \p can_create
   /// is true. Synthetic (inline) frames expanded from the concrete frame #0
@@ -106,6 +106,7 @@ class StackFrameList : public std::enable_shared_from_this<StackFrameList> {
 
 protected:
   friend class Thread;
+  friend class ScriptedFrameProvider;
   friend class ScriptedThread;
 
   /// Use this API to build a stack frame list (used for scripted threads, for
@@ -211,19 +212,23 @@ class StackFrameList : public std::enable_shared_from_this<StackFrameList> {
   /// Whether or not to show synthetic (inline) frames. Immutable.
   const bool m_show_inlined_frames;
 
+  /// Returns true if fetching frames was interrupted, false otherwise.
+  virtual bool FetchFramesUpTo(uint32_t end_idx,
+                               InterruptionControl allow_interrupt);
+
 private:
   uint32_t SetSelectedFrameNoLock(lldb_private::StackFrame *frame);
   lldb::StackFrameSP
   GetFrameAtIndexNoLock(uint32_t idx,
                         std::shared_lock<std::shared_mutex> &guard);
 
+  /// @{
   /// These two Fetch frames APIs and SynthesizeTailCallFrames are called in
   /// GetFramesUpTo, they are the ones that actually add frames.  They must be
   /// called with the writer end of the list mutex held.
-
-  /// Returns true if fetching frames was interrupted, false otherwise.
-  bool FetchFramesUpTo(uint32_t end_idx, InterruptionControl allow_interrupt);
+  ///
   /// Not currently interruptible so returns void.
+  /// }@
   void FetchOnlyConcreteFramesUpTo(uint32_t end_idx);
   void SynthesizeTailCallFrames(StackFrame &next_frame);
 
@@ -231,6 +236,27 @@ class StackFrameList : public std::enable_shared_from_this<StackFrameList> {
   const StackFrameList &operator=(const StackFrameList &) = delete;
 };
 
+/// A StackFrameList that wraps another StackFrameList and uses a
+/// SyntheticFrameProvider to lazily provide frames from either the provider
+/// or the underlying real stack frame list.
+class SyntheticStackFrameList : public StackFrameList {
+public:
+  SyntheticStackFrameList(Thread &thread, lldb::StackFrameListSP input_frames,
+                          const lldb::StackFrameListSP &prev_frames_sp,
+                          bool show_inline_frames);
+
+protected:
+  /// Override FetchFramesUpTo to lazily return frames from the provider
+  /// or from the actual stack frame list.
+  bool FetchFramesUpTo(uint32_t end_idx,
+                       InterruptionControl allow_interrupt) override;
+
+private:
+  /// The input stack frame list that the provider transforms.
+  /// This could be a real StackFrameList or another SyntheticStackFrameList.
+  lldb::StackFrameListSP m_input_frames;
+};
+
 } // namespace lldb_private
 
 #endif // LLDB_TARGET_STACKFRAMELIST_H
diff --git a/lldb/include/lldb/Target/SyntheticFrameProvider.h b/lldb/include/lldb/Target/SyntheticFrameProvider.h
index 61a492f356ece..2d5330cb03105 100644
--- a/lldb/include/lldb/Target/SyntheticFrameProvider.h
+++ b/lldb/include/lldb/Target/SyntheticFrameProvider.h
@@ -24,22 +24,25 @@ namespace lldb_private {
 
 /// This struct contains the metadata needed to instantiate a frame provider
 /// and optional filters to control which threads it applies to.
-struct SyntheticFrameProviderDescriptor {
+struct ScriptedFrameProviderDescriptor {
   /// Metadata for instantiating the provider (e.g. script class name and args).
   lldb::ScriptedMetadataSP scripted_metadata_sp;
 
+  /// Interface for calling static methods on the provider class.
+  lldb::ScriptedFrameProviderInterfaceSP interface_sp;
+
   /// Optional list of thread specifications to which this provider applies.
   /// If empty, the provider applies to all threads. A thread matches if it
   /// satisfies ANY of the specs in this vector (OR logic).
   std::vector<ThreadSpec> thread_specs;
 
-  SyntheticFrameProviderDescriptor() = default;
+  ScriptedFrameProviderDescriptor() = default;
 
-  SyntheticFrameProviderDescriptor(lldb::ScriptedMetadataSP metadata_sp)
+  ScriptedFrameProviderDescriptor(lldb::ScriptedMetadataSP metadata_sp)
       : scripted_metadata_sp(metadata_sp) {}
 
-  SyntheticFrameProviderDescriptor(lldb::ScriptedMetadataSP metadata_sp,
-                                   const std::vector<ThreadSpec> &specs)
+  ScriptedFrameProviderDescriptor(lldb::ScriptedMetadataSP metadata_sp,
+                                  const std::vector<ThreadSpec> &specs)
       : scripted_metadata_sp(metadata_sp), thread_specs(specs) {}
 
   /// Get the name of this descriptor (the scripted class name).
@@ -47,6 +50,12 @@ struct SyntheticFrameProviderDescriptor {
     return scripted_metadata_sp ? scripted_metadata_sp->GetClassName() : "";
   }
 
+  /// Get the description of this frame provider.
+  ///
+  /// \return A string describing what this frame provider does, or an
+  ///         empty string if no description is available.
+  std::string GetDescription() const;
+
   /// Check if this descriptor applies to the given thread.
   bool AppliesToThread(Thread &thread) const {
     // If no thread specs specified, applies to all threads.
@@ -64,6 +73,13 @@ struct SyntheticFrameProviderDescriptor {
   /// Check if this descriptor has valid metadata for script-based providers.
   bool IsValid() const { return scripted_metadata_sp != nullptr; }
 
+  /// Get a unique identifier for this descriptor based on its contents.
+  /// The ID is computed from the class name and arguments dictionary,
+  /// not from the pointer address, so two descriptors with the same
+  /// contents will have the same ID.
+  uint32_t GetID() const;
+
+  /// Dump a description of this descriptor to the given stream.
   void Dump(Stream *s) const;
 };
 
@@ -95,7 +111,7 @@ class SyntheticFrameProvider : public PluginInterface {
   ///     otherwise an \a llvm::Error.
   static llvm::Expected<lldb::SyntheticFrameProviderSP>
   CreateInstance(lldb::StackFrameListSP input_frames,
-                 const SyntheticFrameProviderDescriptor &descriptor);
+                 const ScriptedFrameProviderDescriptor &descriptor);
 
   /// Try to create a SyntheticFrameProvider instance for the given input
   /// frames using a specific C++ plugin.
@@ -125,6 +141,8 @@ class SyntheticFrameProvider : public PluginInterface {
 
   ~SyntheticFrameProvider() override;
 
+  virtual std::string GetDescription() const = 0;
+
   /// Get a single stack frame at the specified index.
   ///
   /// This method is called lazily - frames are only created when requested.
diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h
index c0fcda7c0d960..812a638910b3b 100644
--- a/lldb/include/lldb/Target/Target.h
+++ b/lldb/include/lldb/Target/Target.h
@@ -32,6 +32,7 @@
 #include "lldb/Target/PathMappingList.h"
 #include "lldb/Target/SectionLoadHistory.h"
 #include "lldb/Target/Statistics.h"
+#include "lldb/Target/SyntheticFrameProvider.h"
 #include "lldb/Target/ThreadSpec.h"
 #include "lldb/Utility/ArchSpec.h"
 #include "lldb/Utility/Broadcaster.h"
@@ -745,6 +746,36 @@ class Target : public std::enable_shared_from_this<Target>,
   Status Attach(ProcessAttachInfo &attach_info,
                 Stream *stream); // Optional stream to receive first stop info
 
+  /// Add or update a scripted frame provider descriptor for this target.
+  /// All new threads in this target will check if they match any descriptors
+  /// to create their frame providers.
+  ///
+  /// \param[in] descriptor
+  ///     The descriptor to add or update.
+  ///
+  /// \return
+  ///     The descriptor identifier if the registration succeeded, otherwise an
+  ///     llvm::Error.
+  llvm::Expected<uint32_t> AddScriptedFrameProviderDescriptor(
+      const ScriptedFrameProviderDescriptor &descriptor);
+
+  /// Remove a scripted frame provider descriptor by id.
+  ///
+  /// \param[in] id
+  ///     The id of the descriptor to remove.
+  ///
+  /// \return
+  ///     True if a descriptor was removed, false if no descriptor with that
+  ///     id existed.
+  bool RemoveScriptedFrameProviderDescriptor(uint32_t id);
+
+  /// Clear all scripted frame provider descriptors for this target.
+  void ClearScriptedFrameProviderDescriptors();
+
+  /// Get all scripted frame provider descriptors for this target.
+  const llvm::DenseMap<uint32_t, ScriptedFrameProviderDescriptor> &
+  GetScriptedFrameProviderDescriptors() const;
+
   // This part handles the breakpoints.
 
   BreakpointList &GetBreakpointList(bool internal = false);
@@ -1744,6 +1775,13 @@ class Target : public std::enable_shared_from_this<Target>,
   PathMappingList m_image_search_paths;
   TypeSystemMap m_scratch_type_system_map;
 
+  /// Map of scripted frame provider descriptors for this target.
+  /// Keys are the provider descriptors ids, values are the descriptors.
+  /// Used to initialize frame providers for new threads.
+  llvm::DenseMap<uint32_t, ScriptedFrameProviderDescriptor>
+      m_frame_provider_descriptors;
+  mutable std::recursive_mutex m_frame_provider_descriptors_mutex;
+
   typedef std::map<lldb::LanguageType, lldb::REPLSP> REPLMap;
   REPLMap m_repl_map;
 
diff --git a/lldb/include/lldb/Target/Thread.h b/lldb/include/lldb/Target/Thread.h
index 841f80cd1b1eb..46ce192556756 100644
--- a/lldb/include/lldb/Target/Thread.h
+++ b/lldb/include/lldb/Target/Thread.h
@@ -1297,6 +1297,15 @@ class Thread : public std::enable_shared_from_this<Thread>,
 
   lldb::StackFrameListSP GetStackFrameList();
 
+  llvm::Error
+  LoadScriptedFrameProvider(const ScriptedFrameProviderDescriptor &descriptor);
+
+  void ClearScriptedFrameProvider();
+
+  lldb::SyntheticFrameProviderSP GetFrameProvider() const {
+    return m_frame_provider_sp;
+  }
+
 protected:
   friend class ThreadPlan;
   friend class ThreadList;
@@ -1400,6 +1409,9 @@ class Thread : public std::enable_shared_from_this<Thread>,
   /// The Thread backed by this thread, if any.
   lldb::ThreadWP m_backed_thread;
 
+  /// The Scripted Frame Provider, if any.
+  lldb::SyntheticFrameProviderSP m_frame_provider_sp;
+
 private:
   bool m_extended_info_fetched; // Have we tried to retrieve the m_extended_info
                                 // for this thread?
diff --git a/lldb/include/lldb/Target/ThreadSpec.h b/lldb/include/lldb/Target/ThreadSpec.h
index 7c7c832741196..63f8f8b5ec181 100644
--- a/lldb/include/lldb/Target/ThreadSpec.h
+++ b/lldb/include/lldb/Target/ThreadSpec.h
@@ -34,6 +34,8 @@ class ThreadSpec {
 public:
   ThreadSpec();
 
+  ThreadSpec(Thread &thread);
+
   static std::unique_ptr<ThreadSpec>
   CreateFromStructuredData(const StructuredData::Dictionary &data_dict,
                            Status &error);
diff --git a/lldb/include/lldb/Utility/ScriptedMetadata.h b/lldb/include/lldb/Utility/ScriptedMetadata.h
index 69c83edce909a..8523c95429718 100644
--- a/lldb/include/lldb/Utility/ScriptedMetadata.h
+++ b/lldb/include/lldb/Utility/ScriptedMetadata.h
@@ -10,7 +10,9 @@
 #define LLDB_INTERPRETER_SCRIPTEDMETADATA_H
 
 #include "lldb/Utility/ProcessInfo.h"
+#include "lldb/Utility/StreamString.h"
 #include "lldb/Utility/StructuredData.h"
+#include "llvm/ADT/Hashing.h"
 
 namespace lldb_private {
 class ScriptedMetadata {
@@ -27,11 +29,36 @@ class ScriptedMetadata {
     }
   }
 
+  ScriptedMetadata(const ScriptedMetadata &other)
+      : m_class_name(other.m_class_name), m_args_sp(other.m_args_sp) {}
+
   explicit operator bool() const { return !m_class_name.empty(); }
 
   llvm::StringRef GetClassName() const { return m_class_name; }
   StructuredData::DictionarySP GetArgsSP() const { return m_args_sp; }
 
+  /// Get a unique identifier for this metadata based on its contents.
+  /// The ID is computed from the class name and arguments dictionary,
+  /// not from the pointer address, so two metadata objects with the same
+  /// contents will have the same ID.
+  uint32_t GetID() const {
+    if (m_class_name.empty())
+      return 0;
+
+    // Hash the class name.
+    llvm::hash_code hash = llvm::hash_value(m_class_name);
+
+    // Hash the arguments dictionary if present.
+    if (m_args_sp) {
+      StreamString ss;
+      m_args_sp->GetDescription(ss);
+      hash = llvm::hash_combine(hash, llvm::hash_value(ss.GetData()));
+    }
+
+    // Return the lower 32 bits of the hash.
+    return static_cast<uint32_t>(hash);
+  }
+
 private:
   std::string m_class_name;
   StructuredData::DictionarySP m_args_sp;
diff --git a/lldb/include/lldb/ValueObject/ValueObjectSynthetic.h b/lldb/include/lldb/ValueObject/ValueObjectSynthetic.h
index 063d796ee4eec..1a82fd78bbba3 100644
--- a/lldb/include/lldb/ValueObject/ValueObjectSynthetic.h
+++ b/lldb/include/lldb/ValueObject/ValueObjectSynthetic.h
@@ -123,6 +123,11 @@ class ValueObjectSynthetic : public ValueObject {
 
   void SetLanguageFlags(uint64_t flags) override;
 
+  void
+  GetExpressionPath(Stream &stream,
+                    GetExpressionPathFormat epformat =
+                        eGetExpressionPathFormatDereferencePointers) override;
+
 protected:
   bool UpdateValue() override;
 
diff --git a/lldb/include/lldb/lldb-private-interfaces.h b/lldb/include/lldb/lldb-private-interfaces.h
index 5fc5c14c52f9e..52806eea190a7 100644
--- a/lldb/include/lldb/lldb-private-interfaces.h
+++ b/lldb/include/lldb/lldb-private-interfaces.h
@@ -26,7 +26,7 @@ class Value;
 
 namespace lldb_private {
 class ScriptedInterfaceUsages;
-struct SyntheticFrameProviderDescriptor;
+struct ScriptedFrameProviderDescriptor;
 typedef lldb::ABISP (*ABICreateInstance)(lldb::ProcessSP process_sp,
                                          const ArchSpec &arch);
 typedef std::unique_ptr<Architecture> (*ArchitectureCreateInstance)(
@@ -91,7 +91,7 @@ typedef lldb::ScriptInterpreterSP (*ScriptInterpreterCreateInstance)(
 typedef llvm::Expected<lldb::SyntheticFrameProviderSP> (
     *ScriptedFrameProviderCreateInstance)(
     lldb::StackFrameListSP input_frames,
-    const lldb_private::SyntheticFrameProviderDescriptor &descriptor);
+    const lldb_private::ScriptedFrameProviderDescriptor &descriptor);
 typedef llvm::Expected<lldb::SyntheticFrameProviderSP> (
     *SyntheticFrameProviderCreateInstance)(
     lldb::StackFrameListSP input_frames,
diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp
index 578a7bdf7433d..78c2d49d647b5 100644
--- a/lldb/source/API/SBTarget.cpp
+++ b/lldb/source/API/SBTarget.cpp
@@ -23,6 +23,7 @@
 #include "lldb/API/SBStringList.h"
 #include "lldb/API/SBStructuredData.h"
 #include "lldb/API/SBSymbolContextList.h"
+#include "lldb/API/SBThreadCollection.h"
 #include "lldb/API/SBTrace.h"
 #include "lldb/Breakpoint/BreakpointID.h"
 #include "lldb/Breakpoint/BreakpointIDList.h"
@@ -39,6 +40,7 @@
 #include "lldb/Core/Section.h"
 #include "lldb/Core/StructuredDataImpl.h"
 #include "lldb/Host/Host.h"
+#include "lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h"
 #include "lldb/Symbol/DeclVendor.h"
 #include "lldb/Symbol/ObjectFile.h"
 #include "lldb/Symbol/SymbolFile.h"
@@ -50,6 +52,7 @@
 #include "lldb/Target/LanguageRuntime.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/StackFrame.h"
+#include "lldb/Target/SyntheticFrameProvider.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Target/TargetList.h"
 #include "lldb/Utility/ArchSpec.h"
@@ -59,6 +62,7 @@
 #include "lldb/Utility/LLDBLog.h"
 #include "lldb/Utility/ProcessInfo.h"
 #include "lldb/Utility/RegularExpression.h"
+#include "lldb/Utility/ScriptedMetadata.h"
 #include "lldb/ValueObject/ValueObjectConstResult.h"
 #include "lldb/ValueObject/ValueObjectList.h"
 #include "lldb/ValueObject/ValueObjectVariable.h"
@@ -2435,3 +2439,81 @@ lldb::SBMutex SBTarget::GetAPIMutex() const {
     return lldb::SBMutex(target_sp);
   return lldb::SBMutex();
 }
+
+uint32_t
+SBTarget::RegisterScriptedFrameProvider(const char *class_name,
+                                        lldb::SBStructuredData args_dict,
+                                        lldb::SBError &error) {
+  LLDB_INSTRUMENT_VA(this, class_name, args_dict, error);
+
+  TargetSP target_sp = GetSP();
+  if (!target_sp) {
+    error.SetErrorString("invalid target");
+    return 0;
+  }
+
+  if (!class_name || !class_name[0]) {
+    error.SetErrorString("invalid class name");
+    return 0;
+  }
+
+  // Extract the dictionary from SBStructuredData.
+  StructuredData::DictionarySP dict_sp;
+  if (args_dict.IsValid() && args_dict.m_impl_up) {
+    StructuredData::ObjectSP obj_sp = args_dict.m_impl_up->GetObjectSP();
+    if (obj_sp && obj_sp->GetType() != lldb::eStructuredDataTypeDictionary) {
+      error.SetErrorString("SBStructuredData argument isn't a dictionary");
+      return 0;
+    }
+    dict_sp = std::make_shared<StructuredData::Dictionary>(obj_sp);
+  }
+
+  // Create the ScriptedMetadata.
+  ScriptedMetadataSP metadata_sp =
+      std::make_shared<ScriptedMetadata>(class_name, dict_sp);
+
+  // Create the interface for calling static methods.
+  ScriptedFrameProviderInterfaceSP interface_sp =
+      target_sp->GetDebugger()
+          .GetScriptInterpreter()
+          ->CreateScriptedFrameProviderInterface();
+
+  // Create a descriptor (applies to all threads by default).
+  ScriptedFrameProviderDescriptor descriptor(metadata_sp);
+  descriptor.interface_sp = interface_sp;
+
+  llvm::Expected<uint32_t> descriptor_id_or_err =
+      target_sp->AddScriptedFrameProviderDescriptor(descriptor);
+  if (!descriptor_id_or_err) {
+    error.SetErrorString(
+        llvm::toString(descriptor_id_or_err.takeError()).c_str());
+    return 0;
+  }
+
+  // Register the descriptor with the target.
+  return *descriptor_id_or_err;
+}
+
+lldb::SBError SBTarget::RemoveScriptedFrameProvider(uint32_t provider_id) {
+  LLDB_INSTRUMENT_VA(this, provider_id);
+
+  SBError error;
+  TargetSP target_sp = GetSP();
+  if (!target_sp) {
+    error.SetErrorString("invalid target");
+    return error;
+  }
+
+  if (!provider_id) {
+    error.SetErrorString("invalid provider id");
+    return error;
+  }
+
+  if (!target_sp->RemoveScriptedFrameProviderDescriptor(provider_id)) {
+    error.SetErrorStringWithFormat("no frame provider named '%u' found",
+                                   provider_id);
+    return error;
+  }
+
+  return {};
+}
diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp
index 7f880d223d6c3..6e8c94fa234cd 100644
--- a/lldb/source/Commands/CommandObjectTarget.cpp
+++ b/lldb/source/Commands/CommandObjectTarget.cpp
@@ -51,6 +51,7 @@
 #include "lldb/Utility/ConstString.h"
 #include "lldb/Utility/FileSpec.h"
 #include "lldb/Utility/LLDBLog.h"
+#include "lldb/Utility/ScriptedMetadata.h"
 #include "lldb/Utility/State.h"
 #include "lldb/Utility/Stream.h"
 #include "lldb/Utility/StructuredData.h"
@@ -5402,6 +5403,202 @@ class CommandObjectTargetDump : public CommandObjectMultiword {
   ~CommandObjectTargetDump() override = default;
 };
 
+#pragma mark CommandObjectTargetFrameProvider
+
+#define LLDB_OPTIONS_target_frame_provider_register
+#include "CommandOptions.inc"
+
+class CommandObjectTargetFrameProviderRegister : public CommandObjectParsed {
+public:
+  CommandObjectTargetFrameProviderRegister(CommandInterpreter &interpreter)
+      : CommandObjectParsed(
+            interpreter, "target frame-provider register",
+            "Register frame provider for all threads in this target.", nullptr,
+            eCommandRequiresTarget),
+
+        m_class_options("target frame-provider", true, 'C', 'k', 'v', 0) {
+    m_all_options.Append(&m_class_options, LLDB_OPT_SET_1 | LLDB_OPT_SET_2,
+                         LLDB_OPT_SET_ALL);
+    m_all_options.Finalize();
+
+    AddSimpleArgumentList(eArgTypeRunArgs, eArgRepeatOptional);
+  }
+
+  ~CommandObjectTargetFrameProviderRegister() override = default;
+
+  Options *GetOptions() override { return &m_all_options; }
+
+  std::optional<std::string> GetRepeatCommand(Args &current_command_args,
+                                              uint32_t index) override {
+    return std::string("");
+  }
+
+protected:
+  void DoExecute(Args &launch_args, CommandReturnObject &result) override {
+    ScriptedMetadataSP metadata_sp = std::make_shared<ScriptedMetadata>(
+        m_class_options.GetName(), m_class_options.GetStructuredData());
+
+    Target *target = m_exe_ctx.GetTargetPtr();
+    if (!target)
+      target = &GetDebugger().GetDummyTarget();
+
+    // Create the interface for calling static methods.
+    ScriptedFrameProviderInterfaceSP interface_sp =
+        GetDebugger()
+            .GetScriptInterpreter()
+            ->CreateScriptedFrameProviderInterface();
+
+    // Create a descriptor from the metadata (applies to all threads by
+    // default).
+    ScriptedFrameProviderDescriptor descriptor(metadata_sp);
+    descriptor.interface_sp = interface_sp;
+
+    auto id_or_err = target->AddScriptedFrameProviderDescriptor(descriptor);
+    if (!id_or_err) {
+      result.SetError(id_or_err.takeError());
+      return;
+    }
+
+    result.AppendMessageWithFormat(
+        "successfully registered scripted frame provider '%s' for target\n",
+        m_class_options.GetName().c_str());
+  }
+
+  OptionGroupPythonClassWithDict m_class_options;
+  OptionGroupOptions m_all_options;
+};
+
+class CommandObjectTargetFrameProviderClear : public CommandObjectParsed {
+public:
+  CommandObjectTargetFrameProviderClear(CommandInterpreter &interpreter)
+      : CommandObjectParsed(
+            interpreter, "target frame-provider clear",
+            "Clear all registered frame providers from this target.", nullptr,
+            eCommandRequiresTarget) {}
+
+  ~CommandObjectTargetFrameProviderClear() override = default;
+
+protected:
+  void DoExecute(Args &command, CommandReturnObject &result) override {
+    Target *target = m_exe_ctx.GetTargetPtr();
+    if (!target) {
+      result.AppendError("invalid target");
+      return;
+    }
+
+    target->ClearScriptedFrameProviderDescriptors();
+
+    result.SetStatus(eReturnStatusSuccessFinishResult);
+  }
+};
+
+class CommandObjectTargetFrameProviderList : public CommandObjectParsed {
+public:
+  CommandObjectTargetFrameProviderList(CommandInterpreter &interpreter)
+      : CommandObjectParsed(
+            interpreter, "target frame-provider list",
+            "List all registered frame providers for the target.", nullptr,
+            eCommandRequiresTarget) {}
+
+  ~CommandObjectTargetFrameProviderList() override = default;
+
+protected:
+  void DoExecute(Args &command, CommandReturnObject &result) override {
+    Target *target = m_exe_ctx.GetTargetPtr();
+    if (!target)
+      target = &GetDebugger().GetDummyTarget();
+
+    const auto &descriptors = target->GetScriptedFrameProviderDescriptors();
+    if (descriptors.empty()) {
+      result.AppendMessage("no frame providers registered for this target.");
+      result.SetStatus(eReturnStatusSuccessFinishResult);
+      return;
+    }
+
+    result.AppendMessageWithFormat("%u frame provider(s) registered:\n\n",
+                                   descriptors.size());
+
+    for (const auto &entry : descriptors) {
+      const ScriptedFrameProviderDescriptor &descriptor = entry.second;
+      descriptor.Dump(&result.GetOutputStream());
+      result.GetOutputStream().PutChar('\n');
+    }
+
+    result.SetStatus(eReturnStatusSuccessFinishResult);
+  }
+};
+
+class CommandObjectTargetFrameProviderRemove : public CommandObjectParsed {
+public:
+  CommandObjectTargetFrameProviderRemove(CommandInterpreter &interpreter)
+      : CommandObjectParsed(
+            interpreter, "target frame-provider remove",
+            "Remove a registered frame provider from the target by id.",
+            "target frame-provider remove <provider-id>",
+            eCommandRequiresTarget) {
+    AddSimpleArgumentList(eArgTypeUnsignedInteger, eArgRepeatPlus);
+  }
+
+  ~CommandObjectTargetFrameProviderRemove() override = default;
+
+protected:
+  void DoExecute(Args &command, CommandReturnObject &result) override {
+    Target *target = m_exe_ctx.GetTargetPtr();
+    if (!target)
+      target = &GetDebugger().GetDummyTarget();
+
+    std::vector<uint32_t> removed_provider_ids;
+    for (size_t i = 0; i < command.GetArgumentCount(); i++) {
+      uint32_t provider_id = 0;
+      if (!llvm::to_integer(command[i].ref(), provider_id)) {
+        result.AppendError("target frame-provider remove requires integer "
+                           "provider id argument");
+        return;
+      }
+
+      if (!target->RemoveScriptedFrameProviderDescriptor(provider_id)) {
+        result.AppendErrorWithFormat(
+            "no frame provider named '%u' found in target\n", provider_id);
+        return;
+      }
+      removed_provider_ids.push_back(provider_id);
+    }
+
+    if (size_t num_removed_providers = removed_provider_ids.size()) {
+      result.AppendMessageWithFormat(
+          "Successfully removed %zu frame-providers.\n", num_removed_providers);
+      result.SetStatus(eReturnStatusSuccessFinishNoResult);
+    } else {
+      result.AppendError("0 frame providers removed.\n");
+    }
+  }
+};
+
+class CommandObjectTargetFrameProvider : public CommandObjectMultiword {
+public:
+  CommandObjectTargetFrameProvider(CommandInterpreter &interpreter)
+      : CommandObjectMultiword(
+            interpreter, "target frame-provider",
+            "Commands for registering and viewing frame providers for the "
+            "target.",
+            "target frame-provider [<sub-command-options>] ") {
+    LoadSubCommand("register",
+                   CommandObjectSP(new CommandObjectTargetFrameProviderRegister(
+                       interpreter)));
+    LoadSubCommand("clear",
+                   CommandObjectSP(
+                       new CommandObjectTargetFrameProviderClear(interpreter)));
+    LoadSubCommand(
+        "list",
+        CommandObjectSP(new CommandObjectTargetFrameProviderList(interpreter)));
+    LoadSubCommand(
+        "remove", CommandObjectSP(
+                      new CommandObjectTargetFrameProviderRemove(interpreter)));
+  }
+
+  ~CommandObjectTargetFrameProvider() override = default;
+};
+
 #pragma mark CommandObjectMultiwordTarget
 
 // CommandObjectMultiwordTarget
@@ -5417,6 +5614,9 @@ CommandObjectMultiwordTarget::CommandObjectMultiwordTarget(
                  CommandObjectSP(new CommandObjectTargetDelete(interpreter)));
   LoadSubCommand("dump",
                  CommandObjectSP(new CommandObjectTargetDump(interpreter)));
+  LoadSubCommand(
+      "frame-provider",
+      CommandObjectSP(new CommandObjectTargetFrameProvider(interpreter)));
   LoadSubCommand("list",
                  CommandObjectSP(new CommandObjectTargetList(interpreter)));
   LoadSubCommand("select",
diff --git a/lldb/source/Core/FormatEntity.cpp b/lldb/source/Core/FormatEntity.cpp
index 491f5c6320d97..c528a14fa76d0 100644
--- a/lldb/source/Core/FormatEntity.cpp
+++ b/lldb/source/Core/FormatEntity.cpp
@@ -27,6 +27,7 @@
 #include "lldb/Symbol/Symbol.h"
 #include "lldb/Symbol/SymbolContext.h"
 #include "lldb/Symbol/VariableList.h"
+#include "lldb/Target/BorrowedStackFrame.h"
 #include "lldb/Target/ExecutionContext.h"
 #include "lldb/Target/ExecutionContextScope.h"
 #include "lldb/Target/Language.h"
@@ -109,6 +110,7 @@ constexpr Definition g_frame_child_entries[] = {
                                   g_string_entry),
     Definition("is-artificial", EntryType::FrameIsArtificial),
     Definition("kind", EntryType::FrameKind),
+    Definition("borrowed-info", EntryType::FrameBorrowedInfo),
 };
 
 constexpr Definition g_function_child_entries[] = {
@@ -382,6 +384,7 @@ const char *FormatEntity::Entry::TypeToCString(Type t) {
     ENUM_TO_CSTR(FrameRegisterByName);
     ENUM_TO_CSTR(FrameIsArtificial);
     ENUM_TO_CSTR(FrameKind);
+    ENUM_TO_CSTR(FrameBorrowedInfo);
     ENUM_TO_CSTR(ScriptFrame);
     ENUM_TO_CSTR(FunctionID);
     ENUM_TO_CSTR(FunctionDidChange);
@@ -1761,6 +1764,22 @@ bool FormatEntity::Format(const Entry &entry, Stream &s,
     return false;
   }
 
+  case Entry::Type::FrameBorrowedInfo: {
+    if (exe_ctx)
+      if (StackFrame *frame = exe_ctx->GetFramePtr()) {
+        if (BorrowedStackFrame *borrowed_frame =
+                llvm::dyn_cast<BorrowedStackFrame>(frame)) {
+          if (lldb::StackFrameSP borrowed_from_sp =
+                  borrowed_frame->GetBorrowedFrame()) {
+            s.Printf(" [borrowed from frame #%u]",
+                     borrowed_from_sp->GetFrameIndex());
+            return true;
+          }
+        }
+      }
+    return false;
+  }
+
   case Entry::Type::ScriptFrame:
     if (exe_ctx) {
       StackFrame *frame = exe_ctx->GetFramePtr();
diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp
index f4d1070d0a785..364b2ecadadd4 100644
--- a/lldb/source/Expression/DWARFExpression.cpp
+++ b/lldb/source/Expression/DWARFExpression.cpp
@@ -879,11 +879,11 @@ static Scalar DerefSizeExtractDataHelper(uint8_t *addr_bytes,
   return addr_data.GetAddress(&addr_data_offset);
 }
 
-static llvm::Error Evaluate_DW_OP_deref_size(DWARFExpression::Stack &stack,
-                                             ExecutionContext *exe_ctx,
-                                             lldb::ModuleSP module_sp,
-                                             Process *process, Target *target,
-                                             uint8_t size) {
+static llvm::Error Evaluate_DW_OP_deref_size(
+    DWARFExpression::Stack &stack, ExecutionContext *exe_ctx,
+    lldb::ModuleSP module_sp, Process *process, Target *target, uint8_t size,
+    size_t size_addr_bytes,
+    LocationDescriptionKind &dwarf4_location_description_kind) {
   if (stack.empty())
     return llvm::createStringError(
         "expression stack empty for DW_OP_deref_size");
@@ -892,6 +892,25 @@ static llvm::Error Evaluate_DW_OP_deref_size(DWARFExpression::Stack &stack,
     return llvm::createStringError(
         "Invalid address size for DW_OP_deref_size: %d\n", size);
 
+  // Deref a register or implicit location and truncate the value to `size`
+  // bytes. See the corresponding comment in DW_OP_deref for more details on
+  // why we deref these locations this way.
+  if (dwarf4_location_description_kind == Register ||
+      dwarf4_location_description_kind == Implicit) {
+    // Reset context to default values.
+    dwarf4_location_description_kind = Memory;
+    stack.back().ClearContext();
+
+    // Truncate the value on top of the stack to *size* bytes then
+    // extend to the size of an address (e.g. generic type).
+    Scalar scalar = stack.back().GetScalar();
+    scalar.TruncOrExtendTo(size * 8, /*sign=*/false);
+    scalar.TruncOrExtendTo(size_addr_bytes * 8,
+                           /*sign=*/false);
+    stack.back().GetScalar() = scalar;
+    return llvm::Error::success();
+  }
+
   Value::ValueType value_type = stack.back().GetValueType();
   switch (value_type) {
   case Value::ValueType::HostAddress: {
@@ -1142,8 +1161,9 @@ llvm::Expected<Value> DWARFExpression::Evaluate(
     // target machine.
     case DW_OP_deref: {
       size_t size = opcodes.GetAddressByteSize();
-      if (llvm::Error err = Evaluate_DW_OP_deref_size(stack, exe_ctx, module_sp,
-                                                      process, target, size))
+      if (llvm::Error err = Evaluate_DW_OP_deref_size(
+              stack, exe_ctx, module_sp, process, target, size, size,
+              dwarf4_location_description_kind))
         return err;
     } break;
 
@@ -1161,8 +1181,9 @@ llvm::Expected<Value> DWARFExpression::Evaluate(
     // expression stack.
     case DW_OP_deref_size: {
       size_t size = opcodes.GetU8(&offset);
-      if (llvm::Error err = Evaluate_DW_OP_deref_size(stack, exe_ctx, module_sp,
-                                                      process, target, size))
+      if (llvm::Error err = Evaluate_DW_OP_deref_size(
+              stack, exe_ctx, module_sp, process, target, size,
+              opcodes.GetAddressByteSize(), dwarf4_location_description_kind))
         return err;
     } break;
 
diff --git a/lldb/source/Interpreter/ScriptInterpreter.cpp b/lldb/source/Interpreter/ScriptInterpreter.cpp
index d2fd372bfe9e3..7bad10ff3ea61 100644
--- a/lldb/source/Interpreter/ScriptInterpreter.cpp
+++ b/lldb/source/Interpreter/ScriptInterpreter.cpp
@@ -106,6 +106,13 @@ ScriptInterpreter::GetStatusFromSBError(const lldb::SBError &error) const {
   return Status();
 }
 
+lldb::ThreadSP ScriptInterpreter::GetOpaqueTypeFromSBThread(
+    const lldb::SBThread &thread) const {
+  if (thread.m_opaque_sp)
+    return thread.m_opaque_sp->GetThreadSP();
+  return nullptr;
+}
+
 lldb::StackFrameSP
 ScriptInterpreter::GetOpaqueTypeFromSBFrame(const lldb::SBFrame &frame) const {
   if (frame.m_opaque_sp)
diff --git a/lldb/source/Plugins/CMakeLists.txt b/lldb/source/Plugins/CMakeLists.txt
index 08f444e7b15e8..b6878b21ff71a 100644
--- a/lldb/source/Plugins/CMakeLists.txt
+++ b/lldb/source/Plugins/CMakeLists.txt
@@ -22,6 +22,7 @@ add_subdirectory(SymbolFile)
 add_subdirectory(SystemRuntime)
 add_subdirectory(SymbolLocator)
 add_subdirectory(SymbolVendor)
+add_subdirectory(SyntheticFrameProvider)
 add_subdirectory(Trace)
 add_subdirectory(TraceExporter)
 add_subdirectory(TypeSystem)
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedFrame.cpp b/lldb/source/Plugins/Process/scripted/ScriptedFrame.cpp
index 6519df9185df0..265bc28a8957f 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedFrame.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedFrame.cpp
@@ -7,42 +7,72 @@
 //===----------------------------------------------------------------------===//
 
 #include "ScriptedFrame.h"
-
+#include "Plugins/Process/Utility/RegisterContextMemory.h"
+
+#include "lldb/Core/Address.h"
+#include "lldb/Core/Debugger.h"
+#include "lldb/Interpreter/Interfaces/ScriptedFrameInterface.h"
+#include "lldb/Interpreter/Interfaces/ScriptedThreadInterface.h"
+#include "lldb/Interpreter/ScriptInterpreter.h"
+#include "lldb/Symbol/SymbolContext.h"
+#include "lldb/Target/ExecutionContext.h"
+#include "lldb/Target/Process.h"
+#include "lldb/Target/RegisterContext.h"
+#include "lldb/Target/Thread.h"
 #include "lldb/Utility/DataBufferHeap.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/StructuredData.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
+char ScriptedFrame::ID;
+
 void ScriptedFrame::CheckInterpreterAndScriptObject() const {
   lldbassert(m_script_object_sp && "Invalid Script Object.");
   lldbassert(GetInterface() && "Invalid Scripted Frame Interface.");
 }
 
 llvm::Expected<std::shared_ptr<ScriptedFrame>>
-ScriptedFrame::Create(ScriptedThread &thread,
+ScriptedFrame::Create(ThreadSP thread_sp,
+                      ScriptedThreadInterfaceSP scripted_thread_interface_sp,
                       StructuredData::DictionarySP args_sp,
                       StructuredData::Generic *script_object) {
-  if (!thread.IsValid())
-    return llvm::createStringError("Invalid scripted thread.");
+  if (!thread_sp || !thread_sp->IsValid())
+    return llvm::createStringError("invalid thread");
+
+  ProcessSP process_sp = thread_sp->GetProcess();
+  if (!process_sp || !process_sp->IsValid())
+    return llvm::createStringError("invalid process");
 
-  thread.CheckInterpreterAndScriptObject();
+  ScriptInterpreter *script_interp =
+      process_sp->GetTarget().GetDebugger().GetScriptInterpreter();
+  if (!script_interp)
+    return llvm::createStringError("no script interpreter");
 
-  auto scripted_frame_interface =
-      thread.GetInterface()->CreateScriptedFrameInterface();
+  auto scripted_frame_interface = script_interp->CreateScriptedFrameInterface();
   if (!scripted_frame_interface)
     return llvm::createStringError("failed to create scripted frame interface");
 
   llvm::StringRef frame_class_name;
   if (!script_object) {
-    std::optional<std::string> class_name =
-        thread.GetInterface()->GetScriptedFramePluginName();
-    if (!class_name || class_name->empty())
+    // If no script object is provided and we have a scripted thread interface,
+    // try to get the frame class name from it.
+    if (scripted_thread_interface_sp) {
+      std::optional<std::string> class_name =
+          scripted_thread_interface_sp->GetScriptedFramePluginName();
+      if (!class_name || class_name->empty())
+        return llvm::createStringError(
+            "failed to get scripted frame class name");
+      frame_class_name = *class_name;
+    } else {
       return llvm::createStringError(
-          "failed to get scripted thread class name");
-    frame_class_name = *class_name;
+          "no script object provided and no scripted thread interface");
+    }
   }
 
-  ExecutionContext exe_ctx(thread);
+  ExecutionContext exe_ctx(thread_sp);
   auto obj_or_err = scripted_frame_interface->CreatePluginObject(
       frame_class_name, exe_ctx, args_sp, script_object);
 
@@ -62,7 +92,7 @@ ScriptedFrame::Create(ScriptedThread &thread,
   SymbolContext sc;
   Address symbol_addr;
   if (pc != LLDB_INVALID_ADDRESS) {
-    symbol_addr.SetLoadAddress(pc, &thread.GetProcess()->GetTarget());
+    symbol_addr.SetLoadAddress(pc, &process_sp->GetTarget());
     symbol_addr.CalculateSymbolContext(&sc);
   }
 
@@ -77,11 +107,11 @@ ScriptedFrame::Create(ScriptedThread &thread,
 
   if (!reg_info)
     return llvm::createStringError(
-        "failed to get scripted thread registers info");
+        "failed to get scripted frame registers info");
 
   std::shared_ptr<DynamicRegisterInfo> register_info_sp =
-      DynamicRegisterInfo::Create(
-          *reg_info, thread.GetProcess()->GetTarget().GetArchitecture());
+      DynamicRegisterInfo::Create(*reg_info,
+                                  process_sp->GetTarget().GetArchitecture());
 
   lldb::RegisterContextSP reg_ctx_sp;
 
@@ -96,32 +126,35 @@ ScriptedFrame::Create(ScriptedThread &thread,
 
     std::shared_ptr<RegisterContextMemory> reg_ctx_memory =
         std::make_shared<RegisterContextMemory>(
-            thread, frame_id, *register_info_sp, LLDB_INVALID_ADDRESS);
+            *thread_sp, frame_id, *register_info_sp, LLDB_INVALID_ADDRESS);
     if (!reg_ctx_memory)
-      return llvm::createStringError("failed to create a register context.");
+      return llvm::createStringError("failed to create a register context");
 
     reg_ctx_memory->SetAllRegisterData(data_sp);
     reg_ctx_sp = reg_ctx_memory;
   }
 
   return std::make_shared<ScriptedFrame>(
-      thread, scripted_frame_interface, frame_id, pc, sc, reg_ctx_sp,
+      thread_sp, scripted_frame_interface, frame_id, pc, sc, reg_ctx_sp,
       register_info_sp, owned_script_object_sp);
 }
 
-ScriptedFrame::ScriptedFrame(ScriptedThread &thread,
+ScriptedFrame::ScriptedFrame(ThreadSP thread_sp,
                              ScriptedFrameInterfaceSP interface_sp,
                              lldb::user_id_t id, lldb::addr_t pc,
                              SymbolContext &sym_ctx,
                              lldb::RegisterContextSP reg_ctx_sp,
                              std::shared_ptr<DynamicRegisterInfo> reg_info_sp,
                              StructuredData::GenericSP script_object_sp)
-    : StackFrame(thread.shared_from_this(), /*frame_idx=*/id,
+    : StackFrame(thread_sp, /*frame_idx=*/id,
                  /*concrete_frame_idx=*/id, /*reg_context_sp=*/reg_ctx_sp,
                  /*cfa=*/0, /*pc=*/pc,
                  /*behaves_like_zeroth_frame=*/!id, /*symbol_ctx=*/&sym_ctx),
       m_scripted_frame_interface_sp(interface_sp),
-      m_script_object_sp(script_object_sp), m_register_info_sp(reg_info_sp) {}
+      m_script_object_sp(script_object_sp), m_register_info_sp(reg_info_sp) {
+  // FIXME: This should be part of the base class constructor.
+  m_stack_frame_kind = StackFrame::Kind::Synthetic;
+}
 
 ScriptedFrame::~ScriptedFrame() {}
 
@@ -164,7 +197,7 @@ std::shared_ptr<DynamicRegisterInfo> ScriptedFrame::GetDynamicRegisterInfo() {
     if (!reg_info)
       return ScriptedInterface::ErrorWithMessage<
           std::shared_ptr<DynamicRegisterInfo>>(
-          LLVM_PRETTY_FUNCTION, "Failed to get scripted frame registers info.",
+          LLVM_PRETTY_FUNCTION, "failed to get scripted frame registers info",
           error, LLDBLog::Thread);
 
     ThreadSP thread_sp = m_thread_wp.lock();
@@ -172,7 +205,7 @@ std::shared_ptr<DynamicRegisterInfo> ScriptedFrame::GetDynamicRegisterInfo() {
       return ScriptedInterface::ErrorWithMessage<
           std::shared_ptr<DynamicRegisterInfo>>(
           LLVM_PRETTY_FUNCTION,
-          "Failed to get scripted frame registers info: invalid thread.", error,
+          "failed to get scripted frame registers info: invalid thread", error,
           LLDBLog::Thread);
 
     ProcessSP process_sp = thread_sp->GetProcess();
@@ -180,8 +213,8 @@ std::shared_ptr<DynamicRegisterInfo> ScriptedFrame::GetDynamicRegisterInfo() {
       return ScriptedInterface::ErrorWithMessage<
           std::shared_ptr<DynamicRegisterInfo>>(
           LLVM_PRETTY_FUNCTION,
-          "Failed to get scripted frame registers info: invalid process.",
-          error, LLDBLog::Thread);
+          "failed to get scripted frame registers info: invalid process", error,
+          LLDBLog::Thread);
 
     m_register_info_sp = DynamicRegisterInfo::Create(
         *reg_info, process_sp->GetTarget().GetArchitecture());
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedFrame.h b/lldb/source/Plugins/Process/scripted/ScriptedFrame.h
index b6b77c4a7d160..d1cbd429d4979 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedFrame.h
+++ b/lldb/source/Plugins/Process/scripted/ScriptedFrame.h
@@ -10,21 +10,19 @@
 #define LLDB_SOURCE_PLUGINS_SCRIPTED_FRAME_H
 
 #include "ScriptedThread.h"
-#include "lldb/Interpreter/ScriptInterpreter.h"
 #include "lldb/Target/DynamicRegisterInfo.h"
 #include "lldb/Target/StackFrame.h"
+#include "lldb/lldb-forward.h"
+#include "llvm/Support/Error.h"
+#include <memory>
 #include <string>
 
-namespace lldb_private {
-class ScriptedThread;
-}
-
 namespace lldb_private {
 
 class ScriptedFrame : public lldb_private::StackFrame {
 
 public:
-  ScriptedFrame(ScriptedThread &thread,
+  ScriptedFrame(lldb::ThreadSP thread_sp,
                 lldb::ScriptedFrameInterfaceSP interface_sp,
                 lldb::user_id_t frame_idx, lldb::addr_t pc,
                 SymbolContext &sym_ctx, lldb::RegisterContextSP reg_ctx_sp,
@@ -33,8 +31,29 @@ class ScriptedFrame : public lldb_private::StackFrame {
 
   ~ScriptedFrame() override;
 
+  /// Create a ScriptedFrame from a object instanciated in the script
+  /// interpreter.
+  ///
+  /// \param[in] thread_sp
+  ///     The thread this frame belongs to.
+  ///
+  /// \param[in] scripted_thread_interface_sp
+  ///     The scripted thread interface (needed for ScriptedThread
+  ///     compatibility). Can be nullptr for frames on real threads.
+  ///
+  /// \param[in] args_sp
+  ///     Arguments to pass to the frame creation.
+  ///
+  /// \param[in] script_object
+  ///     The optional script object representing this frame.
+  ///
+  /// \return
+  ///     An Expected containing the ScriptedFrame shared pointer if successful,
+  ///     otherwise an error.
   static llvm::Expected<std::shared_ptr<ScriptedFrame>>
-  Create(ScriptedThread &thread, StructuredData::DictionarySP args_sp,
+  Create(lldb::ThreadSP thread_sp,
+         lldb::ScriptedThreadInterfaceSP scripted_thread_interface_sp,
+         StructuredData::DictionarySP args_sp,
          StructuredData::Generic *script_object = nullptr);
 
   bool IsInlined() override;
@@ -43,6 +62,11 @@ class ScriptedFrame : public lldb_private::StackFrame {
   const char *GetFunctionName() override;
   const char *GetDisplayFunctionName() override;
 
+  bool isA(const void *ClassID) const override {
+    return ClassID == &ID || StackFrame::isA(ClassID);
+  }
+  static bool classof(const StackFrame *obj) { return obj->isA(&ID); }
+
 private:
   void CheckInterpreterAndScriptObject() const;
   lldb::ScriptedFrameInterfaceSP GetInterface() const;
@@ -55,6 +79,8 @@ class ScriptedFrame : public lldb_private::StackFrame {
   lldb::ScriptedFrameInterfaceSP m_scripted_frame_interface_sp;
   lldb_private::StructuredData::GenericSP m_script_object_sp;
   std::shared_ptr<DynamicRegisterInfo> m_register_info_sp;
+
+  static char ID;
 };
 
 } // namespace lldb_private
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
index 491efac5aadef..1dd9c48f56a59 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
@@ -210,7 +210,7 @@ bool ScriptedThread::LoadArtificialStackFrames() {
     SymbolContext sc;
     symbol_addr.CalculateSymbolContext(&sc);
 
-    return std::make_shared<StackFrame>(this->shared_from_this(), idx, idx, cfa,
+    return std::make_shared<StackFrame>(shared_from_this(), idx, idx, cfa,
                                         cfa_is_valid, pc,
                                         StackFrame::Kind::Synthetic, artificial,
                                         behaves_like_zeroth_frame, &sc);
@@ -231,8 +231,8 @@ bool ScriptedThread::LoadArtificialStackFrames() {
       return error.ToError();
     }
 
-    auto frame_or_error =
-        ScriptedFrame::Create(*this, nullptr, object_sp->GetAsGeneric());
+    auto frame_or_error = ScriptedFrame::Create(
+        shared_from_this(), GetInterface(), nullptr, object_sp->GetAsGeneric());
 
     if (!frame_or_error) {
       ScriptedInterface::ErrorWithMessage<bool>(
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptInterpreterPythonInterfaces.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptInterpreterPythonInterfaces.cpp
index d43036d6fe544..f6c707b2bd168 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptInterpreterPythonInterfaces.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptInterpreterPythonInterfaces.cpp
@@ -31,6 +31,7 @@ void ScriptInterpreterPythonInterfaces::Initialize() {
   ScriptedStopHookPythonInterface::Initialize();
   ScriptedBreakpointPythonInterface::Initialize();
   ScriptedThreadPlanPythonInterface::Initialize();
+  ScriptedFrameProviderPythonInterface::Initialize();
 }
 
 void ScriptInterpreterPythonInterfaces::Terminate() {
@@ -40,6 +41,7 @@ void ScriptInterpreterPythonInterfaces::Terminate() {
   ScriptedStopHookPythonInterface::Terminate();
   ScriptedBreakpointPythonInterface::Terminate();
   ScriptedThreadPlanPythonInterface::Terminate();
+  ScriptedFrameProviderPythonInterface::Terminate();
 }
 
 #endif
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.cpp
index b866bf332b7b6..3dde5036453f4 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "lldb/Core/PluginManager.h"
 #include "lldb/Host/Config.h"
 #include "lldb/Target/Thread.h"
 #include "lldb/Utility/Log.h"
@@ -30,18 +31,45 @@ ScriptedFrameProviderPythonInterface::ScriptedFrameProviderPythonInterface(
     ScriptInterpreterPythonImpl &interpreter)
     : ScriptedFrameProviderInterface(), ScriptedPythonInterface(interpreter) {}
 
+bool ScriptedFrameProviderPythonInterface::AppliesToThread(
+    llvm::StringRef class_name, lldb::ThreadSP thread_sp) {
+  // If there is any issue with this method, we will just assume it also applies
+  // to this thread which is the default behavior.
+  constexpr bool fail_value = true;
+  Status error;
+  StructuredData::ObjectSP obj =
+      CallStaticMethod(class_name, "applies_to_thread", error, thread_sp);
+  if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj,
+                                                    error))
+    return fail_value;
+
+  return obj->GetBooleanValue(fail_value);
+}
+
 llvm::Expected<StructuredData::GenericSP>
 ScriptedFrameProviderPythonInterface::CreatePluginObject(
     const llvm::StringRef class_name, lldb::StackFrameListSP input_frames,
     StructuredData::DictionarySP args_sp) {
   if (!input_frames)
-    return llvm::createStringError("Invalid frame list");
+    return llvm::createStringError("invalid frame list");
 
   StructuredDataImpl sd_impl(args_sp);
   return ScriptedPythonInterface::CreatePluginObject(class_name, nullptr,
                                                      input_frames, sd_impl);
 }
 
+std::string ScriptedFrameProviderPythonInterface::GetDescription(
+    llvm::StringRef class_name) {
+  Status error;
+  StructuredData::ObjectSP obj =
+      CallStaticMethod(class_name, "get_description", error);
+  if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj,
+                                                    error))
+    return {};
+
+  return obj->GetStringValue().str();
+}
+
 StructuredData::ObjectSP
 ScriptedFrameProviderPythonInterface::GetFrameAtIndex(uint32_t index) {
   Status error;
@@ -54,4 +82,32 @@ ScriptedFrameProviderPythonInterface::GetFrameAtIndex(uint32_t index) {
   return obj;
 }
 
+bool ScriptedFrameProviderPythonInterface::CreateInstance(
+    lldb::ScriptLanguage language, ScriptedInterfaceUsages usages) {
+  if (language != eScriptLanguagePython)
+    return false;
+
+  return true;
+}
+
+void ScriptedFrameProviderPythonInterface::Initialize() {
+  const std::vector<llvm::StringRef> ci_usages = {
+      "target frame-provider register -C <script-name> [-k key -v value ...]",
+      "target frame-provider list",
+      "target frame-provider remove <provider-name>",
+      "target frame-provider clear"};
+  const std::vector<llvm::StringRef> api_usages = {
+      "SBTarget.RegisterScriptedFrameProvider",
+      "SBTarget.RemoveScriptedFrameProvider",
+      "SBTarget.ClearScriptedFrameProvider"};
+  PluginManager::RegisterPlugin(
+      GetPluginNameStatic(),
+      llvm::StringRef("Provide scripted stack frames for threads"),
+      CreateInstance, eScriptLanguagePython, {ci_usages, api_usages});
+}
+
+void ScriptedFrameProviderPythonInterface::Terminate() {
+  PluginManager::UnregisterPlugin(CreateInstance);
+}
+
 #endif
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.h
index fd163984028d3..97a5cc7c669ea 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.h
@@ -14,17 +14,22 @@
 #if LLDB_ENABLE_PYTHON
 
 #include "ScriptedPythonInterface.h"
+#include "lldb/Core/PluginInterface.h"
 #include "lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h"
 #include <optional>
 
 namespace lldb_private {
 class ScriptedFrameProviderPythonInterface
     : public ScriptedFrameProviderInterface,
-      public ScriptedPythonInterface {
+      public ScriptedPythonInterface,
+      public PluginInterface {
 public:
   ScriptedFrameProviderPythonInterface(
       ScriptInterpreterPythonImpl &interpreter);
 
+  bool AppliesToThread(llvm::StringRef class_name,
+                       lldb::ThreadSP thread_sp) override;
+
   llvm::Expected<StructuredData::GenericSP>
   CreatePluginObject(llvm::StringRef class_name,
                      lldb::StackFrameListSP input_frames,
@@ -33,10 +38,24 @@ class ScriptedFrameProviderPythonInterface
   llvm::SmallVector<AbstractMethodRequirement>
   GetAbstractMethodRequirements() const override {
     return llvm::SmallVector<AbstractMethodRequirement>(
-        {{"get_frame_at_index"}});
+        {{"get_description"}, {"get_frame_at_index"}});
   }
 
+  std::string GetDescription(llvm::StringRef class_name) override;
+
   StructuredData::ObjectSP GetFrameAtIndex(uint32_t index) override;
+
+  static void Initialize();
+  static void Terminate();
+
+  static bool CreateInstance(lldb::ScriptLanguage language,
+                             ScriptedInterfaceUsages usages);
+
+  static llvm::StringRef GetPluginNameStatic() {
+    return "ScriptedFrameProviderPythonInterface";
+  }
+
+  llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
 };
 } // namespace lldb_private
 
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp
index af2e0b5df4d22..ba4473cf9ec4d 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp
@@ -93,6 +93,19 @@ ScriptedPythonInterface::ExtractValueFromPythonObject<lldb::StackFrameSP>(
   return nullptr;
 }
 
+template <>
+lldb::ThreadSP
+ScriptedPythonInterface::ExtractValueFromPythonObject<lldb::ThreadSP>(
+    python::PythonObject &p, Status &error) {
+  if (lldb::SBThread *sb_thread = reinterpret_cast<lldb::SBThread *>(
+          python::LLDBSWIGPython_CastPyObjectToSBThread(p.get())))
+    return m_interpreter.GetOpaqueTypeFromSBThread(*sb_thread);
+  error = Status::FromErrorString(
+      "Couldn't cast lldb::SBThread to lldb_private::Thread.");
+
+  return nullptr;
+}
+
 template <>
 SymbolContext
 ScriptedPythonInterface::ExtractValueFromPythonObject<SymbolContext>(
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h
index 23c56610124a6..53a7ba65f64b7 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h
@@ -387,6 +387,112 @@ class ScriptedPythonInterface : virtual public ScriptedInterface {
     return m_object_instance_sp;
   }
 
+  /// Call a static method on a Python class without creating an instance.
+  ///
+  /// This method resolves a Python class by name and calls a static method
+  /// on it, returning the result. This is useful for calling class-level
+  /// methods that don't require an instance.
+  ///
+  /// \param class_name The fully-qualified name of the Python class.
+  /// \param method_name The name of the static method to call.
+  /// \param error Output parameter to receive error information if the call
+  /// fails.
+  /// \param args Arguments to pass to the static method.
+  ///
+  /// \return The return value of the static method call, or an error value.
+  template <typename T = StructuredData::ObjectSP, typename... Args>
+  T CallStaticMethod(llvm::StringRef class_name, llvm::StringRef method_name,
+                     Status &error, Args &&...args) {
+    using namespace python;
+    using Locker = ScriptInterpreterPythonImpl::Locker;
+
+    std::string caller_signature =
+        llvm::Twine(LLVM_PRETTY_FUNCTION + llvm::Twine(" (") +
+                    llvm::Twine(class_name) + llvm::Twine(".") +
+                    llvm::Twine(method_name) + llvm::Twine(")"))
+            .str();
+
+    if (class_name.empty())
+      return ErrorWithMessage<T>(caller_signature, "missing script class name",
+                                 error);
+
+    Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN,
+                   Locker::FreeLock);
+
+    // Get the interpreter dictionary.
+    auto dict =
+        PythonModule::MainModule().ResolveName<python::PythonDictionary>(
+            m_interpreter.GetDictionaryName());
+    if (!dict.IsAllocated())
+      return ErrorWithMessage<T>(
+          caller_signature,
+          llvm::formatv("could not find interpreter dictionary: {0}",
+                        m_interpreter.GetDictionaryName())
+              .str(),
+          error);
+
+    // Resolve the class.
+    auto class_obj =
+        PythonObject::ResolveNameWithDictionary<python::PythonCallable>(
+            class_name, dict);
+    if (!class_obj.IsAllocated())
+      return ErrorWithMessage<T>(
+          caller_signature,
+          llvm::formatv("could not find script class: {0}", class_name).str(),
+          error);
+
+    // Get the static method from the class.
+    if (!class_obj.HasAttribute(method_name))
+      return ErrorWithMessage<T>(
+          caller_signature,
+          llvm::formatv("class {0} does not have method {1}", class_name,
+                        method_name)
+              .str(),
+          error);
+
+    PythonCallable method =
+        class_obj.GetAttributeValue(method_name).AsType<PythonCallable>();
+    if (!method.IsAllocated())
+      return ErrorWithMessage<T>(caller_signature,
+                                 llvm::formatv("method {0}.{1} is not callable",
+                                               class_name, method_name)
+                                     .str(),
+                                 error);
+
+    // Transform the arguments.
+    std::tuple<Args...> original_args = std::forward_as_tuple(args...);
+    auto transformed_args = TransformArgs(original_args);
+
+    // Call the static method.
+    llvm::Expected<PythonObject> expected_return_object =
+        llvm::make_error<llvm::StringError>("Not initialized.",
+                                            llvm::inconvertibleErrorCode());
+    std::apply(
+        [&method, &expected_return_object](auto &&...args) {
+          llvm::consumeError(expected_return_object.takeError());
+          expected_return_object = method(args...);
+        },
+        transformed_args);
+
+    if (llvm::Error e = expected_return_object.takeError()) {
+      error = Status::FromError(std::move(e));
+      return ErrorWithMessage<T>(
+          caller_signature, "python static method could not be called", error);
+    }
+
+    PythonObject py_return = std::move(expected_return_object.get());
+
+    // Re-assign reference and pointer arguments if needed.
+    if (sizeof...(Args) > 0)
+      if (!ReassignPtrsOrRefsArgs(original_args, transformed_args))
+        return ErrorWithMessage<T>(
+            caller_signature,
+            "couldn't re-assign reference and pointer arguments", error);
+
+    // Extract value from Python object (handles unallocated case).
+    return ExtractValueFromPythonObject<T>(py_return, error);
+  }
+
 protected:
   template <typename T = StructuredData::ObjectSP>
   T ExtractValueFromPythonObject(python::PythonObject &p, Status &error) {
@@ -403,7 +509,7 @@ class ScriptedPythonInterface : virtual public ScriptedInterface {
                     llvm::Twine(method_name) + llvm::Twine(")"))
             .str();
     if (!m_object_instance_sp)
-      return ErrorWithMessage<T>(caller_signature, "Python object ill-formed",
+      return ErrorWithMessage<T>(caller_signature, "python object ill-formed",
                                  error);
 
     Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN,
@@ -415,7 +521,7 @@ class ScriptedPythonInterface : virtual public ScriptedInterface {
     if (!implementor.IsAllocated())
       return llvm::is_contained(GetAbstractMethods(), method_name)
                  ? ErrorWithMessage<T>(caller_signature,
-                                       "Python implementor not allocated.",
+                                       "python implementor not allocated",
                                        error)
                  : T{};
 
@@ -436,20 +542,20 @@ class ScriptedPythonInterface : virtual public ScriptedInterface {
     if (llvm::Error e = expected_return_object.takeError()) {
       error = Status::FromError(std::move(e));
       return ErrorWithMessage<T>(caller_signature,
-                                 "Python method could not be called.", error);
+                                 "python method could not be called", error);
     }
 
     PythonObject py_return = std::move(expected_return_object.get());
 
     // Now that we called the python method with the transformed arguments,
-    // we need to interate again over both the original and transformed
+    // we need to iterate again over both the original and transformed
     // parameter pack, and transform back the parameter that were passed in
     // the original parameter pack as references or pointers.
     if (sizeof...(Args) > 0)
       if (!ReassignPtrsOrRefsArgs(original_args, transformed_args))
         return ErrorWithMessage<T>(
             caller_signature,
-            "Couldn't re-assign reference and pointer arguments.", error);
+            "couldn't re-assign reference and pointer arguments", error);
 
     if (!py_return.IsAllocated())
       return {};
@@ -655,6 +761,11 @@ lldb::StreamSP
 ScriptedPythonInterface::ExtractValueFromPythonObject<lldb::StreamSP>(
     python::PythonObject &p, Status &error);
 
+template <>
+lldb::ThreadSP
+ScriptedPythonInterface::ExtractValueFromPythonObject<lldb::ThreadSP>(
+    python::PythonObject &p, Status &error);
+
 template <>
 lldb::StackFrameSP
 ScriptedPythonInterface::ExtractValueFromPythonObject<lldb::StackFrameSP>(
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h
index 2c971262fc34e..32948ffd30023 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h
@@ -265,6 +265,7 @@ void *LLDBSWIGPython_CastPyObjectToSBLaunchInfo(PyObject *data);
 void *LLDBSWIGPython_CastPyObjectToSBError(PyObject *data);
 void *LLDBSWIGPython_CastPyObjectToSBEvent(PyObject *data);
 void *LLDBSWIGPython_CastPyObjectToSBStream(PyObject *data);
+void *LLDBSWIGPython_CastPyObjectToSBThread(PyObject *data);
 void *LLDBSWIGPython_CastPyObjectToSBFrame(PyObject *data);
 void *LLDBSWIGPython_CastPyObjectToSBSymbolContext(PyObject *data);
 void *LLDBSWIGPython_CastPyObjectToSBValue(PyObject *data);
diff --git a/lldb/source/Plugins/SyntheticFrameProvider/CMakeLists.txt b/lldb/source/Plugins/SyntheticFrameProvider/CMakeLists.txt
new file mode 100644
index 0000000000000..85b405e648c1f
--- /dev/null
+++ b/lldb/source/Plugins/SyntheticFrameProvider/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(ScriptedFrameProvider)
diff --git a/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/CMakeLists.txt b/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/CMakeLists.txt
new file mode 100644
index 0000000000000..fe67d39efdf11
--- /dev/null
+++ b/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_lldb_library(lldbPluginScriptedFrameProvider PLUGIN
+  ScriptedFrameProvider.cpp
+
+  LINK_COMPONENTS
+    Support
+
+  LINK_LIBS
+    lldbCore
+    lldbInterpreter
+    lldbTarget
+    lldbUtility
+  )
diff --git a/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/ScriptedFrameProvider.cpp b/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/ScriptedFrameProvider.cpp
new file mode 100644
index 0000000000000..739963e6f0c2f
--- /dev/null
+++ b/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/ScriptedFrameProvider.cpp
@@ -0,0 +1,221 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScriptedFrameProvider.h"
+#include "Plugins/Process/scripted/ScriptedFrame.h"
+#include "lldb/Core/Debugger.h"
+#include "lldb/Core/PluginManager.h"
+#include "lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h"
+#include "lldb/Interpreter/ScriptInterpreter.h"
+#include "lldb/Target/BorrowedStackFrame.h"
+#include "lldb/Target/Process.h"
+#include "lldb/Target/StackFrame.h"
+#include "lldb/Target/Thread.h"
+#include "lldb/Utility/ScriptedMetadata.h"
+#include "lldb/Utility/Status.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+
+using namespace lldb;
+using namespace lldb_private;
+
+void ScriptedFrameProvider::Initialize() {
+  PluginManager::RegisterPlugin(GetPluginNameStatic(),
+                                "Provides synthetic frames via scripting",
+                                nullptr, ScriptedFrameProvider::CreateInstance);
+}
+
+void ScriptedFrameProvider::Terminate() {
+  PluginManager::UnregisterPlugin(ScriptedFrameProvider::CreateInstance);
+}
+
+llvm::Expected<lldb::SyntheticFrameProviderSP>
+ScriptedFrameProvider::CreateInstance(
+    lldb::StackFrameListSP input_frames,
+    const ScriptedFrameProviderDescriptor &descriptor) {
+  if (!input_frames)
+    return llvm::createStringError(
+        "failed to create scripted frame provider: invalid input frames");
+
+  Thread &thread = input_frames->GetThread();
+  ProcessSP process_sp = thread.GetProcess();
+  if (!process_sp)
+    return nullptr;
+
+  if (!descriptor.IsValid())
+    return llvm::createStringError(
+        "failed to create scripted frame provider: invalid scripted metadata");
+
+  if (!descriptor.AppliesToThread(thread))
+    return nullptr;
+
+  ScriptInterpreter *script_interp =
+      process_sp->GetTarget().GetDebugger().GetScriptInterpreter();
+  if (!script_interp)
+    return llvm::createStringError("cannot create scripted frame provider: No "
+                                   "script interpreter installed");
+
+  ScriptedFrameProviderInterfaceSP interface_sp =
+      script_interp->CreateScriptedFrameProviderInterface();
+  if (!interface_sp)
+    return llvm::createStringError(
+        "cannot create scripted frame provider: script interpreter couldn't "
+        "create Scripted Frame Provider Interface");
+
+  const ScriptedMetadataSP scripted_metadata = descriptor.scripted_metadata_sp;
+
+  // If we shouldn't attach a frame provider to this thread, just exit early.
+  if (!interface_sp->AppliesToThread(scripted_metadata->GetClassName(),
+                                     thread.shared_from_this()))
+    return nullptr;
+
+  auto obj_or_err = interface_sp->CreatePluginObject(
+      scripted_metadata->GetClassName(), input_frames,
+      scripted_metadata->GetArgsSP());
+  if (!obj_or_err)
+    return obj_or_err.takeError();
+
+  StructuredData::ObjectSP object_sp = *obj_or_err;
+  if (!object_sp || !object_sp->IsValid())
+    return llvm::createStringError(
+        "cannot create scripted frame provider: failed to create valid scripted"
+        "frame provider object");
+
+  return std::make_shared<ScriptedFrameProvider>(input_frames, interface_sp,
+                                                 descriptor);
+}
+
+ScriptedFrameProvider::ScriptedFrameProvider(
+    StackFrameListSP input_frames,
+    lldb::ScriptedFrameProviderInterfaceSP interface_sp,
+    const ScriptedFrameProviderDescriptor &descriptor)
+    : SyntheticFrameProvider(input_frames), m_interface_sp(interface_sp),
+      m_descriptor(descriptor) {}
+
+ScriptedFrameProvider::~ScriptedFrameProvider() = default;
+
+std::string ScriptedFrameProvider::GetDescription() const {
+  if (!m_interface_sp)
+    return {};
+
+  return m_interface_sp->GetDescription(m_descriptor.GetName());
+}
+
+llvm::Expected<StackFrameSP>
+ScriptedFrameProvider::GetFrameAtIndex(uint32_t idx) {
+  if (!m_interface_sp)
+    return llvm::createStringError(
+        "cannot get stack frame: scripted frame provider not initialized");
+
+  auto create_frame_from_dict =
+      [this](StructuredData::Dictionary *dict,
+             uint32_t index) -> llvm::Expected<StackFrameSP> {
+    lldb::addr_t pc;
+    if (!dict->GetValueForKeyAsInteger("pc", pc))
+      return llvm::createStringError(
+          "missing 'pc' key from scripted frame dictionary");
+
+    Address symbol_addr;
+    symbol_addr.SetLoadAddress(pc, &GetThread().GetProcess()->GetTarget());
+
+    const lldb::addr_t cfa = LLDB_INVALID_ADDRESS;
+    const bool cfa_is_valid = false;
+    const bool artificial = false;
+    const bool behaves_like_zeroth_frame = false;
+    SymbolContext sc;
+    symbol_addr.CalculateSymbolContext(&sc);
+
+    ThreadSP thread_sp = GetThread().shared_from_this();
+    return std::make_shared<StackFrame>(thread_sp, index, index, cfa,
+                                        cfa_is_valid, pc,
+                                        StackFrame::Kind::Synthetic, artificial,
+                                        behaves_like_zeroth_frame, &sc);
+  };
+
+  auto create_frame_from_script_object =
+      [this](
+          StructuredData::ObjectSP object_sp) -> llvm::Expected<StackFrameSP> {
+    Status error;
+    if (!object_sp || !object_sp->GetAsGeneric())
+      return llvm::createStringError("invalid script object");
+
+    ThreadSP thread_sp = GetThread().shared_from_this();
+    auto frame_or_error = ScriptedFrame::Create(thread_sp, nullptr, nullptr,
+                                                object_sp->GetAsGeneric());
+
+    if (!frame_or_error) {
+      ScriptedInterface::ErrorWithMessage<bool>(
+          LLVM_PRETTY_FUNCTION, toString(frame_or_error.takeError()), error);
+      return error.ToError();
+    }
+
+    return *frame_or_error;
+  };
+
+  StructuredData::ObjectSP obj_sp = m_interface_sp->GetFrameAtIndex(idx);
+
+  // None/null means no more frames or error.
+  if (!obj_sp || !obj_sp->IsValid())
+    return llvm::createStringError("invalid script object returned for frame " +
+                                   llvm::Twine(idx));
+
+  StackFrameSP synth_frame_sp = nullptr;
+  if (StructuredData::UnsignedInteger *int_obj =
+          obj_sp->GetAsUnsignedInteger()) {
+    uint32_t real_frame_index = int_obj->GetValue();
+    if (real_frame_index < m_input_frames->GetNumFrames()) {
+      StackFrameSP real_frame_sp =
+          m_input_frames->GetFrameAtIndex(real_frame_index);
+      synth_frame_sp =
+          (real_frame_index == idx)
+              ? real_frame_sp
+              : std::make_shared<BorrowedStackFrame>(real_frame_sp, idx);
+    }
+  } else if (StructuredData::Dictionary *dict = obj_sp->GetAsDictionary()) {
+    // Check if it's a dictionary describing a frame.
+    auto frame_from_dict_or_err = create_frame_from_dict(dict, idx);
+    if (!frame_from_dict_or_err) {
+      return llvm::createStringError(llvm::Twine(
+          "couldn't create frame from dictionary at index " + llvm::Twine(idx) +
+          ": " + toString(frame_from_dict_or_err.takeError())));
+    }
+    synth_frame_sp = *frame_from_dict_or_err;
+  } else if (obj_sp->GetAsGeneric()) {
+    // It's a ScriptedFrame object.
+    auto frame_from_script_obj_or_err = create_frame_from_script_object(obj_sp);
+    if (!frame_from_script_obj_or_err) {
+      return llvm::createStringError(
+          llvm::Twine("couldn't create frame from script object at index " +
+                      llvm::Twine(idx) + ": " +
+                      toString(frame_from_script_obj_or_err.takeError())));
+    }
+    synth_frame_sp = *frame_from_script_obj_or_err;
+  } else {
+    return llvm::createStringError(
+        llvm::Twine("invalid return type from get_frame_at_index at index " +
+                    llvm::Twine(idx)));
+  }
+
+  if (!synth_frame_sp)
+    return llvm::createStringError(
+        llvm::Twine("failed to create frame at index " + llvm::Twine(idx)));
+
+  synth_frame_sp->SetFrameIndex(idx);
+
+  return synth_frame_sp;
+}
+
+namespace lldb_private {
+void lldb_initialize_ScriptedFrameProvider() {
+  ScriptedFrameProvider::Initialize();
+}
+
+void lldb_terminate_ScriptedFrameProvider() {
+  ScriptedFrameProvider::Terminate();
+}
+} // namespace lldb_private
diff --git a/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/ScriptedFrameProvider.h b/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/ScriptedFrameProvider.h
new file mode 100644
index 0000000000000..3434bf26ade24
--- /dev/null
+++ b/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/ScriptedFrameProvider.h
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_PLUGINS_SYNTHETICFRAMEPROVIDER_SCRIPTEDFRAMEPROVIDER_SCRIPTEDFRAMEPROVIDER_H
+#define LLDB_PLUGINS_SYNTHETICFRAMEPROVIDER_SCRIPTEDFRAMEPROVIDER_SCRIPTEDFRAMEPROVIDER_H
+
+#include "lldb/Target/SyntheticFrameProvider.h"
+#include "lldb/Utility/ScriptedMetadata.h"
+#include "lldb/Utility/Status.h"
+#include "lldb/lldb-forward.h"
+#include "llvm/Support/Error.h"
+
+namespace lldb_private {
+
+class ScriptedFrameProvider : public SyntheticFrameProvider {
+public:
+  static llvm::StringRef GetPluginNameStatic() {
+    return "ScriptedFrameProvider";
+  }
+
+  static llvm::Expected<lldb::SyntheticFrameProviderSP>
+  CreateInstance(lldb::StackFrameListSP input_frames,
+                 const ScriptedFrameProviderDescriptor &descriptor);
+
+  static void Initialize();
+
+  static void Terminate();
+
+  ScriptedFrameProvider(lldb::StackFrameListSP input_frames,
+                        lldb::ScriptedFrameProviderInterfaceSP interface_sp,
+                        const ScriptedFrameProviderDescriptor &descriptor);
+  ~ScriptedFrameProvider() override;
+
+  llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
+
+  std::string GetDescription() const override;
+
+  /// Get a single stack frame at the specified index.
+  llvm::Expected<lldb::StackFrameSP> GetFrameAtIndex(uint32_t idx) override;
+
+private:
+  lldb::ScriptedFrameProviderInterfaceSP m_interface_sp;
+  const ScriptedFrameProviderDescriptor &m_descriptor;
+};
+
+} // namespace lldb_private
+
+#endif // LLDB_PLUGINS_SYNTHETICFRAMEPROVIDER_SCRIPTEDFRAMEPROVIDER_SCRIPTEDFRAMEPROVIDER_H
diff --git a/lldb/source/Target/BorrowedStackFrame.cpp b/lldb/source/Target/BorrowedStackFrame.cpp
new file mode 100644
index 0000000000000..5afadf21fde03
--- /dev/null
+++ b/lldb/source/Target/BorrowedStackFrame.cpp
@@ -0,0 +1,187 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Target/BorrowedStackFrame.h"
+
+using namespace lldb;
+using namespace lldb_private;
+
+char BorrowedStackFrame::ID;
+
+BorrowedStackFrame::BorrowedStackFrame(
+    StackFrameSP borrowed_frame_sp, uint32_t new_frame_index,
+    std::optional<uint32_t> new_concrete_frame_index)
+    : StackFrame(
+          borrowed_frame_sp->GetThread(), new_frame_index,
+          borrowed_frame_sp->GetConcreteFrameIndex(),
+          borrowed_frame_sp->GetRegisterContextSP(),
+          borrowed_frame_sp->GetStackID().GetPC(),
+          borrowed_frame_sp->GetStackID().GetCallFrameAddressWithoutMetadata(),
+          borrowed_frame_sp->m_behaves_like_zeroth_frame,
+          &borrowed_frame_sp->GetSymbolContext(eSymbolContextEverything)),
+      m_borrowed_frame_sp(borrowed_frame_sp),
+      m_new_frame_index(new_frame_index) {
+  if (new_concrete_frame_index)
+    m_new_concrete_frame_index = *new_concrete_frame_index;
+  else
+    m_new_concrete_frame_index =
+        IsInlined() ? LLDB_INVALID_FRAME_ID : new_frame_index;
+}
+
+uint32_t BorrowedStackFrame::GetFrameIndex() const { return m_new_frame_index; }
+
+void BorrowedStackFrame::SetFrameIndex(uint32_t index) {
+  m_new_frame_index = index;
+}
+
+uint32_t BorrowedStackFrame::GetConcreteFrameIndex() {
+  // FIXME: We need to find where the concrete frame into which this frame was
+  // inlined landed in the new stack frame list as that is the correct concrete
+  // frame index in this
+  // stack frame.
+  return m_new_concrete_frame_index;
+}
+
+StackID &BorrowedStackFrame::GetStackID() {
+  return m_borrowed_frame_sp->GetStackID();
+}
+
+const Address &BorrowedStackFrame::GetFrameCodeAddress() {
+  return m_borrowed_frame_sp->GetFrameCodeAddress();
+}
+
+Address BorrowedStackFrame::GetFrameCodeAddressForSymbolication() {
+  return m_borrowed_frame_sp->GetFrameCodeAddressForSymbolication();
+}
+
+bool BorrowedStackFrame::ChangePC(addr_t pc) {
+  return m_borrowed_frame_sp->ChangePC(pc);
+}
+
+const SymbolContext &
+BorrowedStackFrame::GetSymbolContext(SymbolContextItem resolve_scope) {
+  return m_borrowed_frame_sp->GetSymbolContext(resolve_scope);
+}
+
+llvm::Error BorrowedStackFrame::GetFrameBaseValue(Scalar &value) {
+  return m_borrowed_frame_sp->GetFrameBaseValue(value);
+}
+
+DWARFExpressionList *
+BorrowedStackFrame::GetFrameBaseExpression(Status *error_ptr) {
+  return m_borrowed_frame_sp->GetFrameBaseExpression(error_ptr);
+}
+
+Block *BorrowedStackFrame::GetFrameBlock() {
+  return m_borrowed_frame_sp->GetFrameBlock();
+}
+
+RegisterContextSP BorrowedStackFrame::GetRegisterContext() {
+  return m_borrowed_frame_sp->GetRegisterContext();
+}
+
+VariableList *BorrowedStackFrame::GetVariableList(bool get_file_globals,
+                                                  Status *error_ptr) {
+  return m_borrowed_frame_sp->GetVariableList(get_file_globals, error_ptr);
+}
+
+VariableListSP
+BorrowedStackFrame::GetInScopeVariableList(bool get_file_globals,
+                                           bool must_have_valid_location) {
+  return m_borrowed_frame_sp->GetInScopeVariableList(get_file_globals,
+                                                     must_have_valid_location);
+}
+
+ValueObjectSP BorrowedStackFrame::GetValueForVariableExpressionPath(
+    llvm::StringRef var_expr, DynamicValueType use_dynamic, uint32_t options,
+    VariableSP &var_sp, Status &error) {
+  return m_borrowed_frame_sp->GetValueForVariableExpressionPath(
+      var_expr, use_dynamic, options, var_sp, error);
+}
+
+bool BorrowedStackFrame::HasDebugInformation() {
+  return m_borrowed_frame_sp->HasDebugInformation();
+}
+
+const char *BorrowedStackFrame::Disassemble() {
+  return m_borrowed_frame_sp->Disassemble();
+}
+
+ValueObjectSP BorrowedStackFrame::GetValueObjectForFrameVariable(
+    const VariableSP &variable_sp, DynamicValueType use_dynamic) {
+  return m_borrowed_frame_sp->GetValueObjectForFrameVariable(variable_sp,
+                                                             use_dynamic);
+}
+
+bool BorrowedStackFrame::IsInlined() {
+  return m_borrowed_frame_sp->IsInlined();
+}
+
+bool BorrowedStackFrame::IsSynthetic() const {
+  return m_borrowed_frame_sp->IsSynthetic();
+}
+
+bool BorrowedStackFrame::IsHistorical() const {
+  return m_borrowed_frame_sp->IsHistorical();
+}
+
+bool BorrowedStackFrame::IsArtificial() const {
+  return m_borrowed_frame_sp->IsArtificial();
+}
+
+bool BorrowedStackFrame::IsHidden() { return m_borrowed_frame_sp->IsHidden(); }
+
+const char *BorrowedStackFrame::GetFunctionName() {
+  return m_borrowed_frame_sp->GetFunctionName();
+}
+
+const char *BorrowedStackFrame::GetDisplayFunctionName() {
+  return m_borrowed_frame_sp->GetDisplayFunctionName();
+}
+
+ValueObjectSP BorrowedStackFrame::FindVariable(ConstString name) {
+  return m_borrowed_frame_sp->FindVariable(name);
+}
+
+SourceLanguage BorrowedStackFrame::GetLanguage() {
+  return m_borrowed_frame_sp->GetLanguage();
+}
+
+SourceLanguage BorrowedStackFrame::GuessLanguage() {
+  return m_borrowed_frame_sp->GuessLanguage();
+}
+
+ValueObjectSP BorrowedStackFrame::GuessValueForAddress(addr_t addr) {
+  return m_borrowed_frame_sp->GuessValueForAddress(addr);
+}
+
+ValueObjectSP
+BorrowedStackFrame::GuessValueForRegisterAndOffset(ConstString reg,
+                                                   int64_t offset) {
+  return m_borrowed_frame_sp->GuessValueForRegisterAndOffset(reg, offset);
+}
+
+StructuredData::ObjectSP BorrowedStackFrame::GetLanguageSpecificData() {
+  return m_borrowed_frame_sp->GetLanguageSpecificData();
+}
+
+RecognizedStackFrameSP BorrowedStackFrame::GetRecognizedFrame() {
+  return m_borrowed_frame_sp->GetRecognizedFrame();
+}
+
+StackFrameSP BorrowedStackFrame::GetBorrowedFrame() const {
+  return m_borrowed_frame_sp;
+}
+
+bool BorrowedStackFrame::isA(const void *ClassID) const {
+  return ClassID == &ID || StackFrame::isA(ClassID);
+}
+
+bool BorrowedStackFrame::classof(const StackFrame *obj) {
+  return obj->isA(&ID);
+}
diff --git a/lldb/source/Target/CMakeLists.txt b/lldb/source/Target/CMakeLists.txt
index cff59049cdce5..df2ee03860ac0 100644
--- a/lldb/source/Target/CMakeLists.txt
+++ b/lldb/source/Target/CMakeLists.txt
@@ -41,6 +41,7 @@ add_lldb_library(lldbTarget
   SyntheticFrameProvider.cpp
   SectionLoadHistory.cpp
   SectionLoadList.cpp
+  BorrowedStackFrame.cpp
   StackFrame.cpp
   StackFrameList.cpp
   StackFrameRecognizer.cpp
diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp
index 78f67d21d6600..ca3d4a1a29b59 100644
--- a/lldb/source/Target/StackFrame.cpp
+++ b/lldb/source/Target/StackFrame.cpp
@@ -45,6 +45,9 @@
 using namespace lldb;
 using namespace lldb_private;
 
+// LLVM RTTI support.
+char StackFrame::ID;
+
 // The first bits in the flags are reserved for the SymbolContext::Scope bits
 // so we know if we have tried to look up information in our internal symbol
 // context (m_sc) already.
diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp
index 8412e33aaba32..5d1a8a8370414 100644
--- a/lldb/source/Target/StackFrameList.cpp
+++ b/lldb/source/Target/StackFrameList.cpp
@@ -20,6 +20,7 @@
 #include "lldb/Target/StackFrame.h"
 #include "lldb/Target/StackFrameRecognizer.h"
 #include "lldb/Target/StopInfo.h"
+#include "lldb/Target/SyntheticFrameProvider.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Target/Thread.h"
 #include "lldb/Target/Unwind.h"
@@ -55,6 +56,44 @@ StackFrameList::~StackFrameList() {
   Clear();
 }
 
+SyntheticStackFrameList::SyntheticStackFrameList(
+    Thread &thread, lldb::StackFrameListSP input_frames,
+    const lldb::StackFrameListSP &prev_frames_sp, bool show_inline_frames)
+    : StackFrameList(thread, prev_frames_sp, show_inline_frames),
+      m_input_frames(std::move(input_frames)) {}
+
+bool SyntheticStackFrameList::FetchFramesUpTo(
+    uint32_t end_idx, InterruptionControl allow_interrupt) {
+  // Check if the thread has a synthetic frame provider.
+  if (auto provider_sp = m_thread.GetFrameProvider()) {
+    // Use the synthetic frame provider to generate frames lazily.
+    // Keep fetching until we reach end_idx or the provider returns an error.
+    for (uint32_t idx = m_frames.size(); idx <= end_idx; idx++) {
+      if (allow_interrupt &&
+          m_thread.GetProcess()->GetTarget().GetDebugger().InterruptRequested())
+        return true;
+      auto frame_or_err = provider_sp->GetFrameAtIndex(idx);
+      if (!frame_or_err) {
+        // Provider returned error - we've reached the end.
+        LLDB_LOG_ERROR(GetLog(LLDBLog::Thread), frame_or_err.takeError(),
+                       "Frame provider reached end at index {0}: {1}", idx);
+        SetAllFramesFetched();
+        break;
+      }
+      StackFrameSP frame_sp = *frame_or_err;
+      // Set the frame list weak pointer so ExecutionContextRef can resolve
+      // the frame without calling Thread::GetStackFrameList().
+      frame_sp->m_frame_list_wp = shared_from_this();
+      m_frames.push_back(frame_sp);
+    }
+
+    return false; // Not interrupted.
+  }
+
+  // If no provider, fall back to the base implementation.
+  return StackFrameList::FetchFramesUpTo(end_idx, allow_interrupt);
+}
+
 void StackFrameList::CalculateCurrentInlinedDepth() {
   uint32_t cur_inlined_depth = GetCurrentInlinedDepth();
   if (cur_inlined_depth == UINT32_MAX) {
diff --git a/lldb/source/Target/SyntheticFrameProvider.cpp b/lldb/source/Target/SyntheticFrameProvider.cpp
index 241ce82c39be3..97ff42d1ed53e 100644
--- a/lldb/source/Target/SyntheticFrameProvider.cpp
+++ b/lldb/source/Target/SyntheticFrameProvider.cpp
@@ -8,10 +8,12 @@
 
 #include "lldb/Target/SyntheticFrameProvider.h"
 #include "lldb/Core/PluginManager.h"
+#include "lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h"
 #include "lldb/Target/Thread.h"
 #include "lldb/Utility/LLDBLog.h"
 #include "lldb/Utility/Log.h"
 #include "lldb/Utility/Status.h"
+#include "lldb/Utility/Stream.h"
 
 using namespace lldb;
 using namespace lldb_private;
@@ -21,12 +23,17 @@ SyntheticFrameProvider::SyntheticFrameProvider(StackFrameListSP input_frames)
 
 SyntheticFrameProvider::~SyntheticFrameProvider() = default;
 
-void SyntheticFrameProviderDescriptor::Dump(Stream *s) const {
+void ScriptedFrameProviderDescriptor::Dump(Stream *s) const {
   if (!s)
     return;
 
+  s->Format("  ID: {0:x}\n", GetID());
   s->Printf("  Name: %s\n", GetName().str().c_str());
 
+  std::string description = GetDescription();
+  if (!description.empty())
+    s->Printf("  Description: %s\n", description.c_str());
+
   // Show thread filter information.
   if (thread_specs.empty()) {
     s->PutCString("  Thread Filter: (applies to all threads)\n");
@@ -41,9 +48,23 @@ void SyntheticFrameProviderDescriptor::Dump(Stream *s) const {
   }
 }
 
+uint32_t ScriptedFrameProviderDescriptor::GetID() const {
+  if (!scripted_metadata_sp)
+    return 0;
+
+  return scripted_metadata_sp->GetID();
+}
+
+std::string ScriptedFrameProviderDescriptor::GetDescription() const {
+  // If we have an interface, call get_description() to fetch it.
+  if (interface_sp && scripted_metadata_sp)
+    return interface_sp->GetDescription(scripted_metadata_sp->GetClassName());
+  return {};
+}
+
 llvm::Expected<SyntheticFrameProviderSP> SyntheticFrameProvider::CreateInstance(
     StackFrameListSP input_frames,
-    const SyntheticFrameProviderDescriptor &descriptor) {
+    const ScriptedFrameProviderDescriptor &descriptor) {
   if (!input_frames)
     return llvm::createStringError(
         "cannot create synthetic frame provider: invalid input frames");
diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp
index 3a936b85f6339..b6a662ad3f14d 100644
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@@ -3720,6 +3720,61 @@ Status Target::Attach(ProcessAttachInfo &attach_info, Stream *stream) {
   return error;
 }
 
+llvm::Expected<uint32_t> Target::AddScriptedFrameProviderDescriptor(
+    const ScriptedFrameProviderDescriptor &descriptor) {
+  if (!descriptor.IsValid())
+    return llvm::createStringError("invalid frame provider descriptor");
+
+  llvm::StringRef name = descriptor.GetName();
+  if (name.empty())
+    return llvm::createStringError(
+        "frame provider descriptor has no class name");
+
+  std::lock_guard<std::recursive_mutex> guard(
+      m_frame_provider_descriptors_mutex);
+
+  uint32_t descriptor_id = descriptor.GetID();
+  m_frame_provider_descriptors[descriptor_id] = descriptor;
+
+  // Clear frame providers on existing threads so they reload with new config.
+  if (ProcessSP process_sp = GetProcessSP())
+    for (ThreadSP thread_sp : process_sp->Threads())
+      thread_sp->ClearScriptedFrameProvider();
+
+  return descriptor_id;
+}
+
+bool Target::RemoveScriptedFrameProviderDescriptor(uint32_t id) {
+  std::lock_guard<std::recursive_mutex> guard(
+      m_frame_provider_descriptors_mutex);
+  bool removed = m_frame_provider_descriptors.erase(id);
+
+  if (removed)
+    if (ProcessSP process_sp = GetProcessSP())
+      for (ThreadSP thread_sp : process_sp->Threads())
+        thread_sp->ClearScriptedFrameProvider();
+
+  return removed;
+}
+
+void Target::ClearScriptedFrameProviderDescriptors() {
+  std::lock_guard<std::recursive_mutex> guard(
+      m_frame_provider_descriptors_mutex);
+
+  m_frame_provider_descriptors.clear();
+
+  if (ProcessSP process_sp = GetProcessSP())
+    for (ThreadSP thread_sp : process_sp->Threads())
+      thread_sp->ClearScriptedFrameProvider();
+}
+
+const llvm::DenseMap<uint32_t, ScriptedFrameProviderDescriptor> &
+Target::GetScriptedFrameProviderDescriptors() const {
+  std::lock_guard<std::recursive_mutex> guard(
+      m_frame_provider_descriptors_mutex);
+  return m_frame_provider_descriptors;
+}
+
 void Target::FinalizeFileActions(ProcessLaunchInfo &info) {
   Log *log = GetLog(LLDBLog::Process);
 
diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp
index 8c3e19725f8cb..b40e753aca1e9 100644
--- a/lldb/source/Target/Thread.cpp
+++ b/lldb/source/Target/Thread.cpp
@@ -13,9 +13,12 @@
 #include "lldb/Core/Module.h"
 #include "lldb/Core/StructuredDataImpl.h"
 #include "lldb/Host/Host.h"
+#include "lldb/Interpreter/Interfaces/ScriptedFrameInterface.h"
+#include "lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h"
 #include "lldb/Interpreter/OptionValueFileSpecList.h"
 #include "lldb/Interpreter/OptionValueProperties.h"
 #include "lldb/Interpreter/Property.h"
+#include "lldb/Interpreter/ScriptInterpreter.h"
 #include "lldb/Symbol/Function.h"
 #include "lldb/Target/ABI.h"
 #include "lldb/Target/DynamicLoader.h"
@@ -26,6 +29,7 @@
 #include "lldb/Target/ScriptedThreadPlan.h"
 #include "lldb/Target/StackFrameRecognizer.h"
 #include "lldb/Target/StopInfo.h"
+#include "lldb/Target/SyntheticFrameProvider.h"
 #include "lldb/Target/SystemRuntime.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Target/ThreadPlan.h"
@@ -45,6 +49,7 @@
 #include "lldb/Utility/LLDBLog.h"
 #include "lldb/Utility/Log.h"
 #include "lldb/Utility/RegularExpression.h"
+#include "lldb/Utility/ScriptedMetadata.h"
 #include "lldb/Utility/State.h"
 #include "lldb/Utility/Stream.h"
 #include "lldb/Utility/StreamString.h"
@@ -257,6 +262,7 @@ void Thread::DestroyThread() {
   std::lock_guard<std::recursive_mutex> guard(m_frame_mutex);
   m_curr_frames_sp.reset();
   m_prev_frames_sp.reset();
+  m_frame_provider_sp.reset();
   m_prev_framezero_pc.reset();
 }
 
@@ -1439,13 +1445,76 @@ void Thread::CalculateExecutionContext(ExecutionContext &exe_ctx) {
 StackFrameListSP Thread::GetStackFrameList() {
   std::lock_guard<std::recursive_mutex> guard(m_frame_mutex);
 
-  if (!m_curr_frames_sp)
+  if (m_curr_frames_sp)
+    return m_curr_frames_sp;
+
+  // First, try to load a frame provider if we don't have one yet.
+  if (!m_frame_provider_sp) {
+    ProcessSP process_sp = GetProcess();
+    if (process_sp) {
+      Target &target = process_sp->GetTarget();
+      const auto &descriptors = target.GetScriptedFrameProviderDescriptors();
+
+      // Find first descriptor that applies to this thread.
+      for (const auto &entry : descriptors) {
+        const ScriptedFrameProviderDescriptor &descriptor = entry.second;
+        if (descriptor.IsValid() && descriptor.AppliesToThread(*this)) {
+          if (llvm::Error error = LoadScriptedFrameProvider(descriptor)) {
+            LLDB_LOG_ERROR(GetLog(LLDBLog::Thread), std::move(error),
+                           "Failed to load scripted frame provider: {0}");
+          }
+          break; // Use first matching descriptor (success or failure).
+        }
+      }
+    }
+  }
+
+  // Create the frame list based on whether we have a provider.
+  if (m_frame_provider_sp) {
+    // We have a provider - create synthetic frame list.
+    StackFrameListSP input_frames = m_frame_provider_sp->GetInputFrames();
+    m_curr_frames_sp = std::make_shared<SyntheticStackFrameList>(
+        *this, input_frames, m_prev_frames_sp, true);
+  } else {
+    // No provider - use normal unwinder frames.
     m_curr_frames_sp =
         std::make_shared<StackFrameList>(*this, m_prev_frames_sp, true);
+  }
 
   return m_curr_frames_sp;
 }
 
+llvm::Error Thread::LoadScriptedFrameProvider(
+    const ScriptedFrameProviderDescriptor &descriptor) {
+  std::lock_guard<std::recursive_mutex> guard(m_frame_mutex);
+
+  // Note: We don't create input_frames here - it will be created lazily
+  // by SyntheticStackFrameList when frames are first fetched.
+  // Creating them too early can cause crashes during thread initialization.
+
+  // Create a temporary StackFrameList just to get the thread reference for the
+  // provider. The provider won't actually use this - it will get real input
+  // frames from SyntheticStackFrameList later.
+  StackFrameListSP temp_frames =
+      std::make_shared<StackFrameList>(*this, m_prev_frames_sp, true);
+
+  auto provider_or_err =
+      SyntheticFrameProvider::CreateInstance(temp_frames, descriptor);
+  if (!provider_or_err)
+    return provider_or_err.takeError();
+
+  ClearScriptedFrameProvider();
+  m_frame_provider_sp = *provider_or_err;
+  return llvm::Error::success();
+}
+
+void Thread::ClearScriptedFrameProvider() {
+  std::lock_guard<std::recursive_mutex> guard(m_frame_mutex);
+  m_frame_provider_sp.reset();
+  m_curr_frames_sp.reset();
+  m_prev_frames_sp.reset();
+}
+
 std::optional<addr_t> Thread::GetPreviousFrameZeroPC() {
   return m_prev_framezero_pc;
 }
@@ -1466,6 +1535,7 @@ void Thread::ClearStackFrames() {
     m_prev_frames_sp.swap(m_curr_frames_sp);
   m_curr_frames_sp.reset();
 
+  m_frame_provider_sp.reset();
   m_extended_info.reset();
   m_extended_info_fetched = false;
 }
diff --git a/lldb/source/Target/ThreadSpec.cpp b/lldb/source/Target/ThreadSpec.cpp
index ba4c3aa894553..624f64e3af800 100644
--- a/lldb/source/Target/ThreadSpec.cpp
+++ b/lldb/source/Target/ThreadSpec.cpp
@@ -19,6 +19,10 @@ const char *ThreadSpec::g_option_names[static_cast<uint32_t>(
 
 ThreadSpec::ThreadSpec() : m_name(), m_queue_name() {}
 
+ThreadSpec::ThreadSpec(Thread &thread)
+    : m_index(thread.GetIndexID()), m_tid(thread.GetID()),
+      m_name(thread.GetName()), m_queue_name(thread.GetQueueName()) {}
+
 std::unique_ptr<ThreadSpec> ThreadSpec::CreateFromStructuredData(
     const StructuredData::Dictionary &spec_dict, Status &error) {
   uint32_t index = UINT32_MAX;
diff --git a/lldb/source/ValueObject/ValueObjectSynthetic.cpp b/lldb/source/ValueObject/ValueObjectSynthetic.cpp
index f673c51a88412..44e53bd5fd82e 100644
--- a/lldb/source/ValueObject/ValueObjectSynthetic.cpp
+++ b/lldb/source/ValueObject/ValueObjectSynthetic.cpp
@@ -443,3 +443,18 @@ void ValueObjectSynthetic::SetLanguageFlags(uint64_t flags) {
   else
     this->ValueObject::SetLanguageFlags(flags);
 }
+
+void ValueObjectSynthetic::GetExpressionPath(Stream &stream,
+                                             GetExpressionPathFormat epformat) {
+  // A synthetic ValueObject may wrap an underlying  Register or RegisterSet
+  // ValueObject, which requires a different approach to generating the
+  // expression path. In such cases, delegate to the non-synthetic value object.
+  if (const lldb::ValueType obj_value_type = GetValueType();
+      IsSynthetic() && (obj_value_type == lldb::eValueTypeRegister ||
+                        obj_value_type == lldb::eValueTypeRegisterSet)) {
+
+    if (const lldb::ValueObjectSP raw_value = GetNonSyntheticValue())
+      return raw_value->GetExpressionPath(stream, epformat);
+  }
+  return ValueObject::GetExpressionPath(stream, epformat);
+}
diff --git a/lldb/test/API/functionalities/scripted_frame_provider/Makefile b/lldb/test/API/functionalities/scripted_frame_provider/Makefile
new file mode 100644
index 0000000000000..99998b20bcb05
--- /dev/null
+++ b/lldb/test/API/functionalities/scripted_frame_provider/Makefile
@@ -0,0 +1,3 @@
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/functionalities/scripted_frame_provider/TestScriptedFrameProvider.py b/lldb/test/API/functionalities/scripted_frame_provider/TestScriptedFrameProvider.py
new file mode 100644
index 0000000000000..3c0390ef72fd2
--- /dev/null
+++ b/lldb/test/API/functionalities/scripted_frame_provider/TestScriptedFrameProvider.py
@@ -0,0 +1,418 @@
+"""
+Test scripted frame provider functionality.
+"""
+
+import os
+
+import lldb
+from lldbsuite.test.lldbtest import TestBase
+from lldbsuite.test import lldbutil
+
+
+class ScriptedFrameProviderTestCase(TestBase):
+    NO_DEBUG_INFO_TESTCASE = True
+
+    def setUp(self):
+        TestBase.setUp(self)
+        self.source = "main.cpp"
+
+    def test_replace_all_frames(self):
+        """Test that we can replace the entire stack."""
+        self.build()
+        target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(
+            self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False
+        )
+
+        # Import the test frame provider
+        script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py")
+        self.runCmd("command script import " + script_path)
+
+        # Attach the Replace provider
+        error = lldb.SBError()
+        provider_id = target.RegisterScriptedFrameProvider(
+            "test_frame_providers.ReplaceFrameProvider",
+            lldb.SBStructuredData(),
+            error,
+        )
+        self.assertTrue(error.Success(), f"Failed to register provider: {error}")
+        self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero")
+
+        # Verify we have exactly 3 synthetic frames
+        self.assertEqual(thread.GetNumFrames(), 3, "Should have 3 synthetic frames")
+
+        # Verify frame indices and PCs (dictionary-based frames don't have custom function names)
+        frame0 = thread.GetFrameAtIndex(0)
+        self.assertIsNotNone(frame0)
+        self.assertEqual(frame0.GetPC(), 0x1000)
+
+        frame1 = thread.GetFrameAtIndex(1)
+        self.assertIsNotNone(frame1)
+        self.assertIn("thread_func", frame1.GetFunctionName())
+
+        frame2 = thread.GetFrameAtIndex(2)
+        self.assertIsNotNone(frame2)
+        self.assertEqual(frame2.GetPC(), 0x3000)
+
+    def test_prepend_frames(self):
+        """Test that we can add frames before real stack."""
+        self.build()
+        target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(
+            self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False
+        )
+
+        # Get original frame count and PC
+        original_frame_count = thread.GetNumFrames()
+        self.assertGreaterEqual(
+            original_frame_count, 2, "Should have at least 2 real frames"
+        )
+
+        # Import and attach Prepend provider
+        script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py")
+        self.runCmd("command script import " + script_path)
+
+        error = lldb.SBError()
+        provider_id = target.RegisterScriptedFrameProvider(
+            "test_frame_providers.PrependFrameProvider",
+            lldb.SBStructuredData(),
+            error,
+        )
+        self.assertTrue(error.Success(), f"Failed to register provider: {error}")
+        self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero")
+
+        # Verify we have 2 more frames
+        new_frame_count = thread.GetNumFrames()
+        self.assertEqual(new_frame_count, original_frame_count + 2)
+
+        # Verify first 2 frames are synthetic (check PCs, not function names)
+        frame0 = thread.GetFrameAtIndex(0)
+        self.assertEqual(frame0.GetPC(), 0x9000)
+
+        frame1 = thread.GetFrameAtIndex(1)
+        self.assertEqual(frame1.GetPC(), 0xA000)
+
+        # Verify frame 2 is the original real frame 0
+        frame2 = thread.GetFrameAtIndex(2)
+        self.assertIn("thread_func", frame2.GetFunctionName())
+
+    def test_append_frames(self):
+        """Test that we can add frames after real stack."""
+        self.build()
+        target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(
+            self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False
+        )
+
+        # Get original frame count
+        original_frame_count = thread.GetNumFrames()
+
+        # Import and attach Append provider
+        script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py")
+        self.runCmd("command script import " + script_path)
+
+        error = lldb.SBError()
+        provider_id = target.RegisterScriptedFrameProvider(
+            "test_frame_providers.AppendFrameProvider",
+            lldb.SBStructuredData(),
+            error,
+        )
+        self.assertTrue(error.Success(), f"Failed to register provider: {error}")
+        self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero")
+
+        # Verify we have 1 more frame
+        new_frame_count = thread.GetNumFrames()
+        self.assertEqual(new_frame_count, original_frame_count + 1)
+
+        # Verify first frames are still real
+        frame0 = thread.GetFrameAtIndex(0)
+        self.assertIn("thread_func", frame0.GetFunctionName())
+
+        frame_n_plus_1 = thread.GetFrameAtIndex(new_frame_count - 1)
+        self.assertEqual(frame_n_plus_1.GetPC(), 0x10)
+
+    def test_scripted_frame_objects(self):
+        """Test that provider can return ScriptedFrame objects."""
+        self.build()
+        target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(
+            self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False
+        )
+
+        # Import the provider that returns ScriptedFrame objects
+        script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py")
+        self.runCmd("command script import " + script_path)
+
+        error = lldb.SBError()
+        provider_id = target.RegisterScriptedFrameProvider(
+            "test_frame_providers.ScriptedFrameObjectProvider",
+            lldb.SBStructuredData(),
+            error,
+        )
+        self.assertTrue(error.Success(), f"Failed to register provider: {error}")
+        self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero")
+
+        # Verify we have 5 frames
+        self.assertEqual(
+            thread.GetNumFrames(), 5, "Should have 5 custom scripted frames"
+        )
+
+        # Verify frame properties from CustomScriptedFrame
+        frame0 = thread.GetFrameAtIndex(0)
+        self.assertIsNotNone(frame0)
+        self.assertEqual(frame0.GetFunctionName(), "custom_scripted_frame_0")
+        self.assertEqual(frame0.GetPC(), 0x5000)
+        self.assertTrue(frame0.IsSynthetic(), "Frame should be marked as synthetic")
+
+        frame1 = thread.GetFrameAtIndex(1)
+        self.assertIsNotNone(frame1)
+        self.assertEqual(frame1.GetPC(), 0x6000)
+
+        frame2 = thread.GetFrameAtIndex(2)
+        self.assertIsNotNone(frame2)
+        self.assertEqual(frame2.GetFunctionName(), "custom_scripted_frame_2")
+        self.assertEqual(frame2.GetPC(), 0x7000)
+        self.assertTrue(frame2.IsSynthetic(), "Frame should be marked as synthetic")
+
+    def test_applies_to_thread(self):
+        """Test that applies_to_thread filters which threads get the provider."""
+        self.build()
+        target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(
+            self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False
+        )
+
+        # We should have at least 2 threads (worker threads) at the breakpoint
+        num_threads = process.GetNumThreads()
+        self.assertGreaterEqual(
+            num_threads, 2, "Should have at least 2 threads at breakpoint"
+        )
+
+        # Import the test frame provider
+        script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py")
+        self.runCmd("command script import " + script_path)
+
+        # Collect original thread info before applying provider
+        thread_info = {}
+        for i in range(num_threads):
+            t = process.GetThreadAtIndex(i)
+            thread_info[t.GetIndexID()] = {
+                "frame_count": t.GetNumFrames(),
+                "pc": t.GetFrameAtIndex(0).GetPC(),
+            }
+
+        # Register the ThreadFilterFrameProvider which only applies to thread ID 1
+        error = lldb.SBError()
+        provider_id = target.RegisterScriptedFrameProvider(
+            "test_frame_providers.ThreadFilterFrameProvider",
+            lldb.SBStructuredData(),
+            error,
+        )
+        self.assertTrue(error.Success(), f"Failed to register provider: {error}")
+        self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero")
+
+        # Check each thread
+        thread_id_1_found = False
+        for i in range(num_threads):
+            t = process.GetThreadAtIndex(i)
+            thread_id = t.GetIndexID()
+
+            if thread_id == 1:
+                # Thread with ID 1 should have synthetic frame
+                thread_id_1_found = True
+                self.assertEqual(
+                    t.GetNumFrames(),
+                    1,
+                    f"Thread with ID 1 should have 1 synthetic frame",
+                )
+                self.assertEqual(
+                    t.GetFrameAtIndex(0).GetPC(),
+                    0xFFFF,
+                    f"Thread with ID 1 should have synthetic PC 0xFFFF",
+                )
+            else:
+                # Other threads should keep their original frames
+                self.assertEqual(
+                    t.GetNumFrames(),
+                    thread_info[thread_id]["frame_count"],
+                    f"Thread with ID {thread_id} should not be affected by provider",
+                )
+                self.assertEqual(
+                    t.GetFrameAtIndex(0).GetPC(),
+                    thread_info[thread_id]["pc"],
+                    f"Thread with ID {thread_id} should have its original PC",
+                )
+
+        # We should have found at least one thread with ID 1
+        self.assertTrue(
+            thread_id_1_found,
+            "Should have found a thread with ID 1 to test filtering",
+        )
+
+    def test_remove_frame_provider_by_id(self):
+        """Test that RemoveScriptedFrameProvider removes a specific provider by ID."""
+        self.build()
+        target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(
+            self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False
+        )
+
+        # Import the test frame providers
+        script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py")
+        self.runCmd("command script import " + script_path)
+
+        # Get original frame count
+        original_frame_count = thread.GetNumFrames()
+        original_pc = thread.GetFrameAtIndex(0).GetPC()
+
+        # Register the first provider and get its ID
+        error = lldb.SBError()
+        provider_id_1 = target.RegisterScriptedFrameProvider(
+            "test_frame_providers.ReplaceFrameProvider",
+            lldb.SBStructuredData(),
+            error,
+        )
+        self.assertTrue(error.Success(), f"Failed to register provider 1: {error}")
+
+        # Verify first provider is active (3 synthetic frames)
+        self.assertEqual(thread.GetNumFrames(), 3, "Should have 3 synthetic frames")
+        self.assertEqual(
+            thread.GetFrameAtIndex(0).GetPC(), 0x1000, "Should have first provider's PC"
+        )
+
+        # Register a second provider and get its ID
+        provider_id_2 = target.RegisterScriptedFrameProvider(
+            "test_frame_providers.PrependFrameProvider",
+            lldb.SBStructuredData(),
+            error,
+        )
+        self.assertTrue(error.Success(), f"Failed to register provider 2: {error}")
+
+        # Verify IDs are different
+        self.assertNotEqual(
+            provider_id_1, provider_id_2, "Provider IDs should be unique"
+        )
+
+        # Now remove the first provider by ID
+        result = target.RemoveScriptedFrameProvider(provider_id_1)
+        self.assertSuccess(
+            result, f"Should successfully remove provider with ID {provider_id_1}"
+        )
+
+        # After removing the first provider, the second provider should still be active
+        # The PrependFrameProvider adds 2 frames before the real stack
+        # Since ReplaceFrameProvider had 3 frames, and we removed it, we should now
+        # have the original frames (from real stack) with PrependFrameProvider applied
+        new_frame_count = thread.GetNumFrames()
+        self.assertEqual(
+            new_frame_count,
+            original_frame_count + 2,
+            "Should have original frames + 2 prepended frames",
+        )
+
+        # First two frames should be from PrependFrameProvider
+        self.assertEqual(
+            thread.GetFrameAtIndex(0).GetPC(),
+            0x9000,
+            "First frame should be from PrependFrameProvider",
+        )
+        self.assertEqual(
+            thread.GetFrameAtIndex(1).GetPC(),
+            0xA000,
+            "Second frame should be from PrependFrameProvider",
+        )
+
+        # Remove the second provider
+        result = target.RemoveScriptedFrameProvider(provider_id_2)
+        self.assertSuccess(
+            result, f"Should successfully remove provider with ID {provider_id_2}"
+        )
+
+        # After removing both providers, frames should be back to original
+        self.assertEqual(
+            thread.GetNumFrames(),
+            original_frame_count,
+            "Should restore original frame count",
+        )
+        self.assertEqual(
+            thread.GetFrameAtIndex(0).GetPC(),
+            original_pc,
+            "Should restore original PC",
+        )
+
+        # Try to remove a provider that doesn't exist
+        result = target.RemoveScriptedFrameProvider(999999)
+        self.assertTrue(result.Fail(), "Should fail to remove non-existent provider")
+
+    def test_circular_dependency_fix(self):
+        """Test that accessing input_frames in __init__ doesn't cause circular dependency.
+
+        This test verifies the fix for the circular dependency issue where:
+        1. Thread::GetStackFrameList() creates the frame provider
+        2. Provider's __init__ accesses input_frames and calls methods on frames
+        3. SBFrame methods trigger ExecutionContextRef::GetFrameSP()
+        4. Before the fix: GetFrameSP() would call Thread::GetStackFrameList() again -> circular dependency!
+        5. After the fix: GetFrameSP() uses the remembered frame list -> no circular dependency
+
+        The fix works by:
+        - StackFrame stores m_frame_list_wp (weak pointer to originating list)
+        - ExecutionContextRef stores m_frame_list_wp when created from a frame
+        - ExecutionContextRef::GetFrameSP() tries the remembered list first before asking the thread
+        """
+        self.build()
+        target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(
+            self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False
+        )
+
+        # Get original frame count and PC
+        original_frame_count = thread.GetNumFrames()
+        original_pc = thread.GetFrameAtIndex(0).GetPC()
+        self.assertGreaterEqual(
+            original_frame_count, 2, "Should have at least 2 real frames"
+        )
+
+        # Import the provider that accesses input frames in __init__
+        script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py")
+        self.runCmd("command script import " + script_path)
+
+        # Register the CircularDependencyTestProvider
+        # Before the fix, this would crash or hang due to circular dependency
+        error = lldb.SBError()
+        provider_id = target.RegisterScriptedFrameProvider(
+            "test_frame_providers.CircularDependencyTestProvider",
+            lldb.SBStructuredData(),
+            error,
+        )
+
+        # If we get here without crashing, the fix is working!
+        self.assertTrue(error.Success(), f"Failed to register provider: {error}")
+        self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero")
+
+        # Verify the provider worked correctly
+        # Should have 1 synthetic frame + all original frames
+        new_frame_count = thread.GetNumFrames()
+        self.assertEqual(
+            new_frame_count,
+            original_frame_count + 1,
+            "Should have original frames + 1 synthetic frame",
+        )
+
+        # First frame should be synthetic
+        frame0 = thread.GetFrameAtIndex(0)
+        self.assertIsNotNone(frame0)
+        self.assertEqual(
+            frame0.GetPC(),
+            0xDEADBEEF,
+            "First frame should be synthetic frame with PC 0xDEADBEEF",
+        )
+
+        # Second frame should be the original first frame
+        frame1 = thread.GetFrameAtIndex(1)
+        self.assertIsNotNone(frame1)
+        self.assertEqual(
+            frame1.GetPC(),
+            original_pc,
+            "Second frame should be original first frame",
+        )
+
+        # Verify we can still call methods on frames (no circular dependency!)
+        for i in range(min(3, new_frame_count)):
+            frame = thread.GetFrameAtIndex(i)
+            self.assertIsNotNone(frame)
+            # These calls should not trigger circular dependency
+            pc = frame.GetPC()
+            self.assertNotEqual(pc, 0, f"Frame {i} should have valid PC")
diff --git a/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/Makefile b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/Makefile
new file mode 100644
index 0000000000000..10495940055b6
--- /dev/null
+++ b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES := main.c
+
+include Makefile.rules
diff --git a/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/TestFrameProviderCircularDependency.py b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/TestFrameProviderCircularDependency.py
new file mode 100644
index 0000000000000..e03583a99425b
--- /dev/null
+++ b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/TestFrameProviderCircularDependency.py
@@ -0,0 +1,117 @@
+"""
+Test that frame providers wouldn't cause a hang due to a circular dependency
+during its initialization.
+"""
+
+import os
+import lldb
+from lldbsuite.test.lldbtest import TestBase
+from lldbsuite.test import lldbutil
+
+
+class FrameProviderCircularDependencyTestCase(TestBase):
+    NO_DEBUG_INFO_TESTCASE = True
+
+    def setUp(self):
+        TestBase.setUp(self)
+        self.source = "main.c"
+
+    def test_circular_dependency_with_function_replacement(self):
+        """
+        Test the circular dependency fix with a provider that replaces function names.
+        """
+        self.build()
+
+        target = self.dbg.CreateTarget(self.getBuildArtifact("a.out"))
+        self.assertTrue(target, "Target should be valid")
+
+        bkpt = target.BreakpointCreateBySourceRegex(
+            "break here", lldb.SBFileSpec(self.source)
+        )
+        self.assertTrue(bkpt.IsValid(), "Breakpoint should be valid")
+        self.assertEqual(bkpt.GetNumLocations(), 1, "Should have 1 breakpoint location")
+
+        process = target.LaunchSimple(None, None, self.get_process_working_directory())
+        self.assertTrue(process, "Process should be valid")
+        self.assertEqual(
+            process.GetState(), lldb.eStateStopped, "Process should be stopped"
+        )
+
+        thread = process.GetSelectedThread()
+        self.assertTrue(thread.IsValid(), "Thread should be valid")
+
+        frame0 = thread.GetFrameAtIndex(0)
+        self.assertIn("bar", frame0.GetFunctionName(), "Should be stopped in bar()")
+
+        original_frame_count = thread.GetNumFrames()
+        self.assertGreaterEqual(
+            original_frame_count, 3, "Should have at least 3 frames: bar, foo, main"
+        )
+
+        frame_names = [thread.GetFrameAtIndex(i).GetFunctionName() for i in range(3)]
+        self.assertEqual(frame_names[0], "bar", "Frame 0 should be bar")
+        self.assertEqual(frame_names[1], "foo", "Frame 1 should be foo")
+        self.assertEqual(frame_names[2], "main", "Frame 2 should be main")
+
+        script_path = os.path.join(self.getSourceDir(), "frame_provider.py")
+        self.runCmd("command script import " + script_path)
+
+        # Register the frame provider that accesses input_frames.
+        # Before the fix, this registration would trigger the circular dependency:
+        # - Thread::GetStackFrameList() creates provider
+        # - Provider's get_frame_at_index() accesses input_frames[0]
+        # - Calls frame.GetFunctionName() -> ExecutionContextRef::GetFrameSP()
+        # - Before fix: Calls Thread::GetStackFrameList() again -> CIRCULAR!
+        # - After fix: Uses remembered m_frame_list_wp -> Works!
+        error = lldb.SBError()
+        provider_id = target.RegisterScriptedFrameProvider(
+            "frame_provider.ScriptedFrameObjectProvider",
+            lldb.SBStructuredData(),
+            error,
+        )
+
+        # If we reach here without crashing/hanging, the fix is working!
+        self.assertTrue(
+            error.Success(),
+            f"Should successfully register provider (if this fails, circular dependency!): {error}",
+        )
+        self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero")
+
+        # Verify the provider is working correctly.
+        # Frame count should be unchanged (we're replacing frames, not adding).
+        new_frame_count = thread.GetNumFrames()
+        self.assertEqual(
+            new_frame_count,
+            original_frame_count,
+            "Frame count should be unchanged (replacement, not addition)",
+        )
+
+        # Verify that "bar" was replaced with "baz".
+        frame0_new = thread.GetFrameAtIndex(0)
+        self.assertIsNotNone(frame0_new, "Frame 0 should exist")
+        self.assertEqual(
+            frame0_new.GetFunctionName(),
+            "baz",
+            "Frame 0 function should be replaced: bar -> baz",
+        )
+
+        # Verify other frames are unchanged.
+        frame1_new = thread.GetFrameAtIndex(1)
+        self.assertEqual(
+            frame1_new.GetFunctionName(), "foo", "Frame 1 should still be foo"
+        )
+
+        frame2_new = thread.GetFrameAtIndex(2)
+        self.assertEqual(
+            frame2_new.GetFunctionName(), "main", "Frame 2 should still be main"
+        )
+
+        # Verify we can call methods on all frames (no circular dependency!).
+        for i in range(new_frame_count):
+            frame = thread.GetFrameAtIndex(i)
+            self.assertIsNotNone(frame, f"Frame {i} should exist")
+            # These calls should not trigger circular dependency.
+            pc = frame.GetPC()
+            self.assertNotEqual(pc, 0, f"Frame {i} should have valid PC")
+            func_name = frame.GetFunctionName()
+            self.assertIsNotNone(func_name, f"Frame {i} should have function name")
diff --git a/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/frame_provider.py b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/frame_provider.py
new file mode 100644
index 0000000000000..f27f18cd07b7f
--- /dev/null
+++ b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/frame_provider.py
@@ -0,0 +1,102 @@
+"""
+Frame provider that reproduces the circular dependency issue.
+
+This provider accesses input_frames and calls methods on them,
+which before the fix would cause a circular dependency.
+"""
+
+import lldb
+from lldb.plugins.scripted_process import ScriptedFrame
+from lldb.plugins.scripted_frame_provider import ScriptedFrameProvider
+
+
+class CustomScriptedFrame(ScriptedFrame):
+    """Custom scripted frame with full control over frame behavior."""
+
+    def __init__(self, thread, idx, pc, function_name):
+        args = lldb.SBStructuredData()
+        super().__init__(thread, args)
+
+        self.idx = idx
+        self.pc = pc
+        self.function_name = function_name
+
+    def get_id(self):
+        """Return the frame index."""
+        return self.idx
+
+    def get_pc(self):
+        """Return the program counter."""
+        return self.pc
+
+    def get_function_name(self):
+        """Return the function name."""
+        return self.function_name
+
+    def is_artificial(self):
+        """Mark as artificial frame."""
+        return False
+
+    def is_hidden(self):
+        """Not hidden."""
+        return False
+
+    def get_register_context(self):
+        return None
+
+
+class ScriptedFrameObjectProvider(ScriptedFrameProvider):
+    """
+    Provider that returns ScriptedFrame objects and accesses input_frames.
+
+    This provider demonstrates the circular dependency bug fix:
+    1. During get_frame_at_index(), we access input_frames[idx]
+    2. We call frame.GetFunctionName() and frame.GetPC() on input frames
+    3. Before the fix: These calls would trigger ExecutionContextRef::GetFrameSP()
+       which would call Thread::GetStackFrameList() -> circular dependency!
+    4. After the fix: ExecutionContextRef uses the remembered frame list -> no circular dependency
+    """
+
+    def __init__(self, input_frames, args):
+        super().__init__(input_frames, args)
+        self.replacement_count = 0
+        if self.target.process:
+            baz_symbol_ctx = self.target.FindFunctions("baz")
+            self.baz_symbol_ctx = None
+            if len(baz_symbol_ctx) == 1:
+                self.baz_symbol_ctx = baz_symbol_ctx[0]
+
+    @staticmethod
+    def get_description():
+        """Return a description of this provider."""
+        return "Provider that replaces 'bar' function with 'baz'"
+
+    def get_frame_at_index(self, idx):
+        """
+        Replace frames named 'bar' with custom frames named 'baz'.
+
+        This accesses input_frames and calls methods on them, which would
+        trigger the circular dependency bug before the fix.
+        """
+        if idx < len(self.input_frames):
+            # This access and method calls would cause circular dependency before fix!
+            frame = self.input_frames[idx]
+
+            # Calling GetFunctionName() triggers ExecutionContextRef resolution.
+            function_name = frame.GetFunctionName()
+
+            if function_name == "bar" and self.baz_symbol_ctx:
+                # Replace "bar" with "baz".
+                baz_func = self.baz_symbol_ctx.GetFunction()
+                new_function_name = baz_func.GetName()
+                pc = baz_func.GetStartAddress().GetLoadAddress(self.target)
+                custom_frame = CustomScriptedFrame(
+                    self.thread, idx, pc, new_function_name
+                )
+                self.replacement_count += 1
+                return custom_frame
+
+            # Pass through other frames by returning their index.
+            return idx
+
+        return None
diff --git a/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/main.c b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/main.c
new file mode 100644
index 0000000000000..bbd1028236f40
--- /dev/null
+++ b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/main.c
@@ -0,0 +1,21 @@
+#include <stdio.h>
+
+int baz() {
+  printf("baz\n");
+  return 666;
+}
+
+int bar() {
+  printf("bar\n");
+  return 42; // break here.
+}
+
+int foo() {
+  printf("foo\n");
+  return bar();
+}
+
+int main() {
+  printf("main\n");
+  return foo();
+}
diff --git a/lldb/test/API/functionalities/scripted_frame_provider/main.cpp b/lldb/test/API/functionalities/scripted_frame_provider/main.cpp
new file mode 100644
index 0000000000000..0298e88e4de16
--- /dev/null
+++ b/lldb/test/API/functionalities/scripted_frame_provider/main.cpp
@@ -0,0 +1,53 @@
+// Multi-threaded test program for testing frame providers.
+
+#include <condition_variable>
+#include <iostream>
+#include <mutex>
+#include <thread>
+
+std::mutex mtx;
+std::condition_variable cv;
+int ready_count = 0;
+constexpr int NUM_THREADS = 2;
+
+void thread_func(int thread_num) {
+  std::cout << "Thread " << thread_num << " started\n";
+
+  {
+    std::unique_lock<std::mutex> lock(mtx);
+    ready_count++;
+    if (ready_count == NUM_THREADS + 1) {
+      cv.notify_all();
+    } else {
+      cv.wait(lock, [] { return ready_count == NUM_THREADS + 1; });
+    }
+  }
+
+  std::cout << "Thread " << thread_num << " at breakpoint\n"; // Break here.
+}
+
+int main(int argc, char **argv) {
+  std::thread threads[NUM_THREADS];
+
+  for (int i = 0; i < NUM_THREADS; i++) {
+    threads[i] = std::thread(thread_func, i);
+  }
+
+  {
+    std::unique_lock<std::mutex> lock(mtx);
+    ready_count++;
+    if (ready_count == NUM_THREADS + 1) {
+      cv.notify_all();
+    } else {
+      cv.wait(lock, [] { return ready_count == NUM_THREADS + 1; });
+    }
+  }
+
+  std::cout << "Main thread at barrier\n";
+
+  for (int i = 0; i < NUM_THREADS; i++)
+    threads[i].join();
+
+  std::cout << "All threads completed\n";
+  return 0;
+}
diff --git a/lldb/test/API/functionalities/scripted_frame_provider/test_frame_providers.py b/lldb/test/API/functionalities/scripted_frame_provider/test_frame_providers.py
new file mode 100644
index 0000000000000..b9731fdc0a197
--- /dev/null
+++ b/lldb/test/API/functionalities/scripted_frame_provider/test_frame_providers.py
@@ -0,0 +1,222 @@
+"""
+Test frame providers for scripted frame provider functionality.
+
+These providers demonstrate various merge strategies:
+- Replace: Replace entire stack
+- Prepend: Add frames before real stack
+- Append: Add frames after real stack
+
+It also shows the ability to mix a dictionary, a ScriptedFrame or an SBFrame
+index to create stackframes
+"""
+
+import lldb
+from lldb.plugins.scripted_process import ScriptedFrame
+from lldb.plugins.scripted_frame_provider import ScriptedFrameProvider
+
+
+class ReplaceFrameProvider(ScriptedFrameProvider):
+    """Replace entire stack with custom frames."""
+
+    def __init__(self, input_frames, args):
+        super().__init__(input_frames, args)
+        self.frames = [
+            {
+                "idx": 0,
+                "pc": 0x1000,
+            },
+            0,
+            {
+                "idx": 2,
+                "pc": 0x3000,
+            },
+        ]
+
+    @staticmethod
+    def get_description():
+        """Return a description of this provider."""
+        return "Replace entire stack with 3 custom frames"
+
+    def get_frame_at_index(self, index):
+        if index >= len(self.frames):
+            return None
+        return self.frames[index]
+
+
+class PrependFrameProvider(ScriptedFrameProvider):
+    """Prepend synthetic frames before real stack."""
+
+    def __init__(self, input_frames, args):
+        super().__init__(input_frames, args)
+
+    @staticmethod
+    def get_description():
+        """Return a description of this provider."""
+        return "Prepend 2 synthetic frames before real stack"
+
+    def get_frame_at_index(self, index):
+        if index == 0:
+            return {"pc": 0x9000}
+        elif index == 1:
+            return {"pc": 0xA000}
+        elif index - 2 < len(self.input_frames):
+            return index - 2  # Return real frame index.
+        return None
+
+
+class AppendFrameProvider(ScriptedFrameProvider):
+    """Append synthetic frames after real stack."""
+
+    def __init__(self, input_frames, args):
+        super().__init__(input_frames, args)
+
+    @staticmethod
+    def get_description():
+        """Return a description of this provider."""
+        return "Append 1 synthetic frame after real stack"
+
+    def get_frame_at_index(self, index):
+        if index < len(self.input_frames):
+            return index  # Return real frame index.
+        elif index == len(self.input_frames):
+            return {
+                "idx": 1,
+                "pc": 0x10,
+            }
+        return None
+
+
+class CustomScriptedFrame(ScriptedFrame):
+    """Custom scripted frame with full control over frame behavior."""
+
+    def __init__(self, thread, idx, pc, function_name):
+        args = lldb.SBStructuredData()
+        super().__init__(thread, args)
+
+        self.idx = idx
+        self.pc = pc
+        self.function_name = function_name
+
+    def get_id(self):
+        """Return the frame index."""
+        return self.idx
+
+    def get_pc(self):
+        """Return the program counter."""
+        return self.pc
+
+    def get_function_name(self):
+        """Return the function name."""
+        return self.function_name
+
+    def is_artificial(self):
+        """Mark as artificial frame."""
+        return False
+
+    def is_hidden(self):
+        """Not hidden."""
+        return False
+
+    def get_register_context(self):
+        """No register context for this test."""
+        return None
+
+
+class ScriptedFrameObjectProvider(ScriptedFrameProvider):
+    """Provider that returns ScriptedFrame objects instead of dictionaries."""
+
+    def __init__(self, input_frames, args):
+        super().__init__(input_frames, args)
+
+    @staticmethod
+    def get_description():
+        """Return a description of this provider."""
+        return "Provider returning custom ScriptedFrame objects"
+
+    def get_frame_at_index(self, index):
+        """Return ScriptedFrame objects or dictionaries based on index."""
+        if index == 0:
+            return CustomScriptedFrame(
+                self.thread, 0, 0x5000, "custom_scripted_frame_0"
+            )
+        elif index == 1:
+            return {"pc": 0x6000}
+        elif index == 2:
+            return CustomScriptedFrame(
+                self.thread, 2, 0x7000, "custom_scripted_frame_2"
+            )
+        elif index == 3:
+            return len(self.input_frames) - 2  # Real frame index.
+        elif index == 4:
+            return len(self.input_frames) - 1  # Real frame index.
+        return None
+
+
+class ThreadFilterFrameProvider(ScriptedFrameProvider):
+    """Provider that only applies to thread with ID 1."""
+
+    @staticmethod
+    def applies_to_thread(thread):
+        """Only apply to thread with index ID 1."""
+        return thread.GetIndexID() == 1
+
+    def __init__(self, input_frames, args):
+        super().__init__(input_frames, args)
+
+    @staticmethod
+    def get_description():
+        """Return a description of this provider."""
+        return "Provider that only applies to thread ID 1"
+
+    def get_frame_at_index(self, index):
+        """Return a single synthetic frame."""
+        if index == 0:
+            return {"pc": 0xFFFF}
+        return None
+
+
+class CircularDependencyTestProvider(ScriptedFrameProvider):
+    """
+    Provider that tests the circular dependency fix.
+
+    This provider accesses input_frames during __init__ and calls methods
+    on those frames. Before the fix, this would cause a circular dependency:
+    - Thread::GetStackFrameList() creates provider
+    - Provider's __init__ accesses input_frames[0]
+    - SBFrame::GetPC() tries to resolve ExecutionContextRef
+    - ExecutionContextRef::GetFrameSP() calls Thread::GetStackFrameList()
+    - Re-enters initialization -> circular dependency!
+
+    With the fix, ExecutionContextRef remembers the frame list, so it doesn't
+    re-enter Thread::GetStackFrameList().
+    """
+
+    def __init__(self, input_frames, args):
+        super().__init__(input_frames, args)
+
+        # This would cause circular dependency before the fix!
+        # Accessing frames and calling methods on them during __init__
+        self.original_frame_count = len(input_frames)
+        self.original_pcs = []
+
+        # Call GetPC() on each input frame - this triggers ExecutionContextRef resolution.
+        for i in range(min(3, len(input_frames))):
+            frame = input_frames[i]
+            if frame.IsValid():
+                pc = frame.GetPC()
+                self.original_pcs.append(pc)
+
+    @staticmethod
+    def get_description():
+        """Return a description of this provider."""
+        return "Provider that tests circular dependency fix by accessing frames in __init__"
+
+    def get_frame_at_index(self, index):
+        """Prepend a synthetic frame, then pass through original frames."""
+        if index == 0:
+            # Synthetic frame at index 0.
+            return {"pc": 0xDEADBEEF}
+        elif index - 1 < self.original_frame_count:
+            # Pass through original frames at indices 1, 2, 3, ...
+            return index - 1
+        return None
diff --git a/lldb/test/API/python_api/exprpath_register/Makefile b/lldb/test/API/python_api/exprpath_register/Makefile
new file mode 100644
index 0000000000000..10495940055b6
--- /dev/null
+++ b/lldb/test/API/python_api/exprpath_register/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES := main.c
+
+include Makefile.rules
diff --git a/lldb/test/API/python_api/exprpath_register/TestExprPathRegisters.py b/lldb/test/API/python_api/exprpath_register/TestExprPathRegisters.py
new file mode 100644
index 0000000000000..4ffbc5e49fb0d
--- /dev/null
+++ b/lldb/test/API/python_api/exprpath_register/TestExprPathRegisters.py
@@ -0,0 +1,64 @@
+"""
+Test Getting the expression path for registers works correctly
+"""
+
+import lldb
+from lldbsuite.test import lldbutil
+from lldbsuite.test.lldbtest import TestBase, VALID_BREAKPOINT, VALID_TARGET
+
+
+class TestExprPathRegisters(TestBase):
+    NO_DEBUG_INFO_TESTCASE = True
+
+    def verify_register_path(self, reg_value: lldb.SBValue):
+        stream = lldb.SBStream()
+        reg_name = reg_value.name
+        self.assertTrue(
+            reg_value.GetExpressionPath(stream),
+            f"Expected an expression path for register {reg_name}.",
+        )
+        reg_expr_path = stream.GetData()
+        self.assertEqual(reg_expr_path, f"${reg_name}")
+
+    def test_float_registers(self):
+        """Verify the expression path of the registers is valid."""
+        self.build()
+        _, _, thread, _ = lldbutil.run_to_name_breakpoint(self, "my_foo")
+        frame = thread.GetSelectedFrame()
+        self.assertTrue(frame, "Expected a valid Frame.")
+
+        # possible floating point register on some cpus.
+        register_names = [
+            "xmm0",
+            "ymm0",
+            "v0",
+            "v1",
+            "f0",
+            "f1",
+            "d0",
+            "d1",
+            "vr0",
+            "vr1",
+            "st0",
+            "st1",
+        ]
+        for name in register_names:
+            reg_value = frame.FindRegister(name)
+            # some the register will not be available for the cpu
+            # only verify if it is valid.
+            if reg_value:
+                self.verify_register_path(reg_value)
+
+    def test_all_registers(self):
+        """Test all the registers that is avaiable on the machine"""
+        self.build()
+        _, _, thread, _ = lldbutil.run_to_name_breakpoint(self, "my_foo")
+        frame = thread.GetSelectedFrame()
+        self.assertTrue(frame, "Expected a valid Frame.")
+
+        register_sets = frame.GetRegisters()
+        self.assertTrue(register_sets.IsValid(), "Expected Frame Registers")
+
+        for register_set in register_sets:
+            for register in register_set.children:
+                self.verify_register_path(register)
diff --git a/lldb/test/API/python_api/exprpath_register/main.c b/lldb/test/API/python_api/exprpath_register/main.c
new file mode 100644
index 0000000000000..4809a87cdf210
--- /dev/null
+++ b/lldb/test/API/python_api/exprpath_register/main.c
@@ -0,0 +1,10 @@
+
+float my_foo() {
+  float result = 10.0 + 20.0;
+  return result;
+}
+
+int main(void) {
+  float result = my_foo();
+  return (int)result;
+}
diff --git a/lldb/test/API/python_api/sbframe_extensions/Makefile b/lldb/test/API/python_api/sbframe_extensions/Makefile
new file mode 100644
index 0000000000000..10495940055b6
--- /dev/null
+++ b/lldb/test/API/python_api/sbframe_extensions/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES := main.c
+
+include Makefile.rules
diff --git a/lldb/test/API/python_api/sbframe_extensions/TestSBFrameExtensions.py b/lldb/test/API/python_api/sbframe_extensions/TestSBFrameExtensions.py
new file mode 100644
index 0000000000000..d3eabfdd979c5
--- /dev/null
+++ b/lldb/test/API/python_api/sbframe_extensions/TestSBFrameExtensions.py
@@ -0,0 +1,534 @@
+"""
+Test SBFrameExtensions API.
+"""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TestSBFrameExtensions(TestBase):
+    NO_DEBUG_INFO_TESTCASE = True
+
+    def setUp(self):
+        TestBase.setUp(self)
+        self.source = "main.c"
+
+    def _get_frame(self):
+        """Helper method to get a valid frame for testing."""
+        self.build()
+        target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(
+            self, "Set breakpoint here", lldb.SBFileSpec(self.source)
+        )
+        frame = thread.GetFrameAtIndex(0)
+        self.assertTrue(frame.IsValid(), "Frame should be valid")
+        return frame, thread
+
+    def test_property_pc(self):
+        """Test SBFrame extension property: pc"""
+        frame, _ = self._get_frame()
+
+        pc = frame.pc
+        self.assertIsInstance(pc, int, "pc should be an integer")
+        self.assertGreater(pc, 0, "pc should be greater than 0")
+        self.assertEqual(pc, frame.GetPC(), "pc property should match GetPC()")
+
+    def test_property_addr(self):
+        """Test SBFrame extension property: addr"""
+        frame, _ = self._get_frame()
+
+        addr = frame.addr
+        self.assertTrue(addr.IsValid(), "addr should be valid")
+        self.assertEqual(addr, frame.GetPCAddress(), "addr should match GetPCAddress()")
+
+    def test_property_fp(self):
+        """Test SBFrame extension property: fp"""
+        frame, _ = self._get_frame()
+
+        fp = frame.fp
+        self.assertIsInstance(fp, int, "fp should be an integer")
+        self.assertEqual(fp, frame.GetFP(), "fp property should match GetFP()")
+
+    def test_property_sp(self):
+        """Test SBFrame extension property: sp"""
+        frame, _ = self._get_frame()
+
+        sp = frame.sp
+        self.assertIsInstance(sp, int, "sp should be an integer")
+        self.assertEqual(sp, frame.GetSP(), "sp property should match GetSP()")
+
+    def test_property_module(self):
+        """Test SBFrame extension property: module"""
+        frame, _ = self._get_frame()
+
+        module = frame.module
+        self.assertTrue(module.IsValid(), "module should be valid")
+        self.assertEqual(module, frame.GetModule(), "module should match GetModule()")
+
+    def test_property_compile_unit(self):
+        """Test SBFrame extension property: compile_unit"""
+        frame, _ = self._get_frame()
+
+        compile_unit = frame.compile_unit
+        self.assertTrue(compile_unit.IsValid(), "compile_unit should be valid")
+        self.assertEqual(
+            compile_unit,
+            frame.GetCompileUnit(),
+            "compile_unit should match GetCompileUnit()",
+        )
+
+    def test_property_function(self):
+        """Test SBFrame extension property: function"""
+        frame, _ = self._get_frame()
+
+        function = frame.function
+        self.assertTrue(function.IsValid(), "function should be valid")
+        self.assertEqual(
+            function, frame.GetFunction(), "function should match GetFunction()"
+        )
+
+    def test_property_symbol(self):
+        """Test SBFrame extension property: symbol"""
+        frame, _ = self._get_frame()
+
+        symbol = frame.symbol
+        self.assertTrue(symbol.IsValid(), "symbol should be valid")
+        self.assertEqual(symbol, frame.GetSymbol(), "symbol should match GetSymbol()")
+
+    def test_property_block(self):
+        """Test SBFrame extension property: block"""
+        frame, _ = self._get_frame()
+
+        block = frame.block
+        self.assertTrue(block.IsValid(), "block should be valid")
+        block_direct = frame.GetBlock()
+        self.assertTrue(block_direct.IsValid(), "GetBlock() should return valid block")
+        # Verify both blocks are valid and have the same ranges
+        # by comparing their first range start address.
+        block_ranges = block.GetRanges()
+        block_direct_ranges = block_direct.GetRanges()
+        if block_ranges.GetSize() > 0 and block_direct_ranges.GetSize() > 0:
+            self.assertEqual(
+                block.GetRangeStartAddress(0),
+                block_direct.GetRangeStartAddress(0),
+                "block should match GetBlock() start address",
+            )
+
+    def test_property_is_inlined(self):
+        """Test SBFrame extension property: is_inlined"""
+        frame, _ = self._get_frame()
+
+        is_inlined = frame.is_inlined
+        self.assertIsInstance(is_inlined, bool, "is_inlined should be a boolean")
+        self.assertEqual(
+            is_inlined, frame.IsInlined(), "is_inlined should match IsInlined()"
+        )
+
+    def test_property_name(self):
+        """Test SBFrame extension property: name"""
+        frame, _ = self._get_frame()
+
+        name = frame.name
+        self.assertIsInstance(name, str, "name should be a string")
+        self.assertEqual(
+            name, frame.GetFunctionName(), "name should match GetFunctionName()"
+        )
+        # Should be one of our functions.
+        self.assertIn(
+            name, ["func1", "func2", "main"], "name should be a known function"
+        )
+
+    def test_property_line_entry(self):
+        """Test SBFrame extension property: line_entry"""
+        frame, _ = self._get_frame()
+
+        line_entry = frame.line_entry
+        self.assertTrue(line_entry.IsValid(), "line_entry should be valid")
+        self.assertEqual(
+            line_entry, frame.GetLineEntry(), "line_entry should match GetLineEntry()"
+        )
+
+    def test_property_thread(self):
+        """Test SBFrame extension property: thread"""
+        frame, thread = self._get_frame()
+
+        thread_prop = frame.thread
+        self.assertTrue(thread_prop.IsValid(), "thread should be valid")
+        self.assertEqual(
+            thread_prop, frame.GetThread(), "thread should match GetThread()"
+        )
+        self.assertEqual(
+            thread_prop.GetThreadID(),
+            thread.GetThreadID(),
+            "thread should be the same thread",
+        )
+
+    def test_property_disassembly(self):
+        """Test SBFrame extension property: disassembly"""
+        frame, _ = self._get_frame()
+
+        disassembly = frame.disassembly
+        self.assertIsInstance(disassembly, str, "disassembly should be a string")
+        self.assertGreater(len(disassembly), 0, "disassembly should not be empty")
+        self.assertEqual(
+            disassembly, frame.Disassemble(), "disassembly should match Disassemble()"
+        )
+
+    def test_property_idx(self):
+        """Test SBFrame extension property: idx"""
+        frame, _ = self._get_frame()
+
+        idx = frame.idx
+        self.assertIsInstance(idx, int, "idx should be an integer")
+        self.assertEqual(idx, frame.GetFrameID(), "idx should match GetFrameID()")
+        self.assertEqual(idx, 0, "First frame should have idx 0")
+
+    def test_property_variables(self):
+        """Test SBFrame extension property: variables"""
+        frame, _ = self._get_frame()
+
+        variables = frame.variables
+        self.assertIsInstance(
+            variables, lldb.SBValueList, "variables should be SBValueList"
+        )
+        all_vars = frame.GetVariables(True, True, True, True)
+        self.assertEqual(
+            variables.GetSize(),
+            all_vars.GetSize(),
+            "variables should match GetVariables(True, True, True, True)",
+        )
+
+    def test_property_vars(self):
+        """Test SBFrame extension property: vars (alias for variables)"""
+        frame, _ = self._get_frame()
+
+        vars_prop = frame.vars
+        self.assertIsInstance(vars_prop, lldb.SBValueList, "vars should be SBValueList")
+        variables = frame.variables
+        self.assertEqual(
+            vars_prop.GetSize(),
+            variables.GetSize(),
+            "vars should match variables",
+        )
+
+    def test_property_locals(self):
+        """Test SBFrame extension property: locals"""
+        frame, _ = self._get_frame()
+
+        locals_prop = frame.locals
+        self.assertIsInstance(
+            locals_prop, lldb.SBValueList, "locals should be SBValueList"
+        )
+        locals_direct = frame.GetVariables(False, True, False, False)
+        self.assertEqual(
+            locals_prop.GetSize(),
+            locals_direct.GetSize(),
+            "locals should match GetVariables(False, True, False, False)",
+        )
+
+    def test_property_args(self):
+        """Test SBFrame extension property: args"""
+        frame, _ = self._get_frame()
+
+        args_prop = frame.args
+        self.assertIsInstance(args_prop, lldb.SBValueList, "args should be SBValueList")
+        args_direct = frame.GetVariables(True, False, False, False)
+        self.assertEqual(
+            args_prop.GetSize(),
+            args_direct.GetSize(),
+            "args should match GetVariables(True, False, False, False)",
+        )
+
+    def test_property_arguments(self):
+        """Test SBFrame extension property: arguments (alias for args)"""
+        frame, _ = self._get_frame()
+
+        arguments_prop = frame.arguments
+        self.assertIsInstance(
+            arguments_prop, lldb.SBValueList, "arguments should be SBValueList"
+        )
+        args_prop = frame.args
+        self.assertEqual(
+            arguments_prop.GetSize(),
+            args_prop.GetSize(),
+            "arguments should match args",
+        )
+
+    def test_property_statics(self):
+        """Test SBFrame extension property: statics"""
+        frame, _ = self._get_frame()
+
+        statics_prop = frame.statics
+        self.assertIsInstance(
+            statics_prop, lldb.SBValueList, "statics should be SBValueList"
+        )
+        statics_direct = frame.GetVariables(False, False, True, False)
+        self.assertEqual(
+            statics_prop.GetSize(),
+            statics_direct.GetSize(),
+            "statics should match GetVariables(False, False, True, False)",
+        )
+
+    def test_property_registers(self):
+        """Test SBFrame extension property: registers"""
+        frame, _ = self._get_frame()
+
+        registers = frame.registers
+        # registers returns an SBValueList that can be iterated.
+        self.assertTrue(hasattr(registers, "__iter__"), "registers should be iterable")
+        registers_direct = frame.GetRegisters()
+        # Compare by iterating and counting.
+        registers_count = sum(1 for _ in registers)
+        registers_direct_count = sum(1 for _ in registers_direct)
+        self.assertEqual(
+            registers_count,
+            registers_direct_count,
+            "registers should match GetRegisters()",
+        )
+
+    def test_property_regs(self):
+        """Test SBFrame extension property: regs (alias for registers)"""
+        frame, _ = self._get_frame()
+
+        regs = frame.regs
+        self.assertTrue(hasattr(regs, "__iter__"), "regs should be iterable")
+        registers = frame.registers
+        regs_count = sum(1 for _ in regs)
+        registers_count = sum(1 for _ in registers)
+        self.assertEqual(regs_count, registers_count, "regs should match registers")
+
+    def test_property_register(self):
+        """Test SBFrame extension property: register (flattened view)"""
+        frame, _ = self._get_frame()
+
+        register = frame.register
+        self.assertIsNotNone(register, "register should not be None")
+        # register is a helper object with __iter__ and __getitem__.
+        reg_names = set()
+        for reg in register:
+            self.assertTrue(reg.IsValid(), "Register should be valid")
+            reg_names.add(reg.name)
+
+        # Test register indexing by name.
+        if len(reg_names) > 0:
+            first_reg_name = list(reg_names)[0]
+            reg_by_name = register[first_reg_name]
+            self.assertTrue(reg_by_name.IsValid(), "Register by name should be valid")
+            self.assertEqual(
+                reg_by_name.name, first_reg_name, "Register name should match"
+            )
+
+    def test_property_reg(self):
+        """Test SBFrame extension property: reg (alias for register)"""
+        frame, _ = self._get_frame()
+
+        reg = frame.reg
+        self.assertIsNotNone(reg, "reg should not be None")
+        register = frame.register
+        reg_names = set()
+        for r in reg:
+            reg_names.add(r.name)
+        reg_names2 = set()
+        for r in register:
+            reg_names2.add(r.name)
+        self.assertEqual(reg_names, reg_names2, "reg should match register")
+
+    def test_property_parent(self):
+        """Test SBFrame extension property: parent"""
+        frame0, thread = self._get_frame()
+
+        # If there's a parent frame (frame 1), test parent property.
+        if thread.GetNumFrames() > 1:
+            frame1 = thread.GetFrameAtIndex(1)
+            parent = frame0.parent
+            self.assertTrue(parent.IsValid(), "parent should be valid")
+            self.assertEqual(
+                parent.GetFrameID(),
+                frame1.GetFrameID(),
+                "parent should be the next frame",
+            )
+            self.assertEqual(
+                parent.pc, frame1.GetPC(), "parent PC should match frame 1"
+            )
+
+    def test_property_child(self):
+        """Test SBFrame extension property: child"""
+        frame0, thread = self._get_frame()
+
+        # Test child property (should be frame -1, which doesn't exist, so should return invalid).
+        child = frame0.child
+        # Child of frame 0 would be frame -1, which doesn't exist.
+        # So it should return an invalid frame.
+        if thread.GetNumFrames() == 1:
+            self.assertFalse(child.IsValid(), "child of only frame should be invalid")
+
+    def test_method_get_all_variables(self):
+        """Test SBFrame extension method: get_all_variables()"""
+        frame, _ = self._get_frame()
+
+        all_vars = frame.get_all_variables()
+        self.assertIsInstance(
+            all_vars, lldb.SBValueList, "get_all_variables should return SBValueList"
+        )
+        all_vars_direct = frame.GetVariables(True, True, True, True)
+        self.assertEqual(
+            all_vars.GetSize(),
+            all_vars_direct.GetSize(),
+            "get_all_variables should match GetVariables(True, True, True, True)",
+        )
+
+    def test_method_get_arguments(self):
+        """Test SBFrame extension method: get_arguments()"""
+        frame, _ = self._get_frame()
+
+        args = frame.get_arguments()
+        self.assertIsInstance(
+            args, lldb.SBValueList, "get_arguments should return SBValueList"
+        )
+        args_direct = frame.GetVariables(True, False, False, False)
+        self.assertEqual(
+            args.GetSize(),
+            args_direct.GetSize(),
+            "get_arguments should match GetVariables(True, False, False, False)",
+        )
+
+    def test_method_get_locals(self):
+        """Test SBFrame extension method: get_locals()"""
+        frame, _ = self._get_frame()
+
+        locals = frame.get_locals()
+        self.assertIsInstance(
+            locals, lldb.SBValueList, "get_locals should return SBValueList"
+        )
+        locals_direct = frame.GetVariables(False, True, False, False)
+        self.assertEqual(
+            locals.GetSize(),
+            locals_direct.GetSize(),
+            "get_locals should match GetVariables(False, True, False, False)",
+        )
+
+    def test_method_get_statics(self):
+        """Test SBFrame extension method: get_statics()"""
+        frame, _ = self._get_frame()
+
+        statics = frame.get_statics()
+        self.assertIsInstance(
+            statics, lldb.SBValueList, "get_statics should return SBValueList"
+        )
+        statics_direct = frame.GetVariables(False, False, True, False)
+        self.assertEqual(
+            statics.GetSize(),
+            statics_direct.GetSize(),
+            "get_statics should match GetVariables(False, False, True, False)",
+        )
+
+    def test_method_var(self):
+        """Test SBFrame extension method: var()"""
+        frame, _ = self._get_frame()
+
+        # Test var() method with a variable that should exist.
+        # First, let's see what variables are available.
+        all_vars = frame.GetVariables(True, True, True, True)
+        if all_vars.GetSize() > 0:
+            var_name = all_vars.GetValueAtIndex(0).GetName()
+            var_value = frame.var(var_name)
+            self.assertTrue(var_value.IsValid(), f"var('{var_name}') should be valid")
+            self.assertEqual(
+                var_value.GetName(),
+                var_name,
+                f"var('{var_name}') should return the correct variable",
+            )
+            # Compare with GetValueForVariablePath.
+            var_direct = frame.GetValueForVariablePath(var_name)
+            self.assertEqual(
+                var_value.GetName(),
+                var_direct.GetName(),
+                "var() should match GetValueForVariablePath()",
+            )
+
+        # Test var() with non-existent variable.
+        invalid_var = frame.var("NonExistentVariable12345")
+        self.assertFalse(
+            invalid_var.IsValid(), "var() with non-existent variable should be invalid"
+        )
+
+    def test_method_get_parent_frame(self):
+        """Test SBFrame extension method: get_parent_frame()"""
+        frame0, thread = self._get_frame()
+
+        # Test get_parent_frame.
+        if thread.GetNumFrames() > 1:
+            parent = frame0.get_parent_frame()
+            self.assertTrue(
+                parent.IsValid(), "get_parent_frame should return valid frame"
+            )
+            frame1 = thread.GetFrameAtIndex(1)
+            self.assertEqual(
+                parent.GetFrameID(),
+                frame1.GetFrameID(),
+                "get_parent_frame should return frame 1",
+            )
+        else:
+            # If there's only one frame, parent should be invalid.
+            parent = frame0.get_parent_frame()
+            # Note: get_parent_frame might return an invalid frame if idx+1 is out of bounds.
+
+    def test_method_get_child_frame(self):
+        """Test SBFrame extension method: get_child_frame()"""
+        frame0, thread = self._get_frame()
+
+        # Test get_child_frame (frame -1 doesn't exist, so should be invalid).
+        child = frame0.get_child_frame()
+        if thread.GetNumFrames() == 1:
+            self.assertFalse(
+                child.IsValid(), "get_child_frame of only frame should be invalid"
+            )
+
+    def test_special_method_int(self):
+        """Test SBFrame extension special method: __int__"""
+        frame0, _ = self._get_frame()
+
+        # Test __int__ (converts frame to its frame ID).
+        frame_id = int(frame0)
+        self.assertIsInstance(frame_id, int, "__int__ should return an integer")
+        self.assertEqual(
+            frame_id, frame0.GetFrameID(), "__int__ should return frame ID"
+        )
+
+    def test_special_method_hex(self):
+        """Test SBFrame extension special method: __hex__"""
+        frame0, _ = self._get_frame()
+
+        # Test __hex__ (converts frame to its PC).
+        # Note: __hex__ returns the PC as an integer, not a hex string.
+        # In Python 3, hex() builtin calls __index__ if __hex__ doesn't exist,
+        # but since __hex__ is defined, it will be called.
+        pc_hex = frame0.__hex__()
+        self.assertIsInstance(pc_hex, int, "__hex__ should return an integer (PC)")
+        self.assertEqual(pc_hex, frame0.GetPC(), "__hex__ should return PC")
+
+    def test_special_method_eq(self):
+        """Test SBFrame extension special method: __eq__ and __ne__"""
+        frame0, thread = self._get_frame()
+
+        # Test __eq__ and __ne__.
+        frame0_copy = thread.GetFrameAtIndex(0)
+        self.assertTrue(frame0 == frame0_copy, "Same frame should be equal")
+        self.assertFalse(frame0 != frame0_copy, "Same frame should not be not-equal")
+
+        if thread.GetNumFrames() > 1:
+            frame1 = thread.GetFrameAtIndex(1)
+            self.assertFalse(frame0 == frame1, "Different frames should not be equal")
+            self.assertTrue(frame0 != frame1, "Different frames should be not-equal")
+
+    def test_pc_property_settable(self):
+        """Test that pc property is settable"""
+        frame, _ = self._get_frame()
+
+        original_pc = frame.GetPC()
+        # Test that we can set pc (though this might not work on all platforms).
+        # We'll just verify the property exists and can be read.
+        pc = frame.pc
+        self.assertIsInstance(pc, int, "pc should be readable")
+        # Note: Setting pc might not be supported on all platforms, so we just test reading.
diff --git a/lldb/test/API/python_api/sbframe_extensions/main.c b/lldb/test/API/python_api/sbframe_extensions/main.c
new file mode 100644
index 0000000000000..8e2d3ed8e5a5f
--- /dev/null
+++ b/lldb/test/API/python_api/sbframe_extensions/main.c
@@ -0,0 +1,33 @@
+#include <stdio.h>
+
+// Global and static variables for testing
+int g_global_var = 42;
+static int g_static_var = 100;
+
+// Function declarations
+int func1(int arg1, char arg2);
+int func2(int arg1, int arg2);
+
+int func1(int arg1, char arg2) {
+  static int static_var = 200;
+  int local1 = arg1 * 2;
+  char local2 = arg2;
+  // Set breakpoint here
+  return local1 + local2 + static_var;
+}
+
+int func2(int arg1, int arg2) {
+  int local1 = arg1 + arg2;
+  int local2 = arg1 * arg2;
+  // Set breakpoint here
+  return func1(local1, 'X');
+}
+
+int main(int argc, char const *argv[]) {
+  int main_local = 10;
+  static int main_static = 50;
+  // Set breakpoint here
+  int result = func2(5, 7);
+  printf("Result: %d\n", result);
+  return 0;
+}
diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py
index b0d4e272d5646..0c8c39d37e089 100644
--- a/lldb/test/Shell/helper/toolchain.py
+++ b/lldb/test/Shell/helper/toolchain.py
@@ -226,7 +226,7 @@ def use_support_substitutions(config):
         except OSError:
             res = -1
         if res == 0 and out:
-            sdk_path = str(out)
+            sdk_path = out.decode("utf-8")
             llvm_config.lit_config.note("using SDKROOT: %r" % sdk_path)
             host_flags += ["-isysroot", sdk_path]
     elif sys.platform != "win32":
diff --git a/lldb/tools/lldb-dap/Handler/InitializeRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/InitializeRequestHandler.cpp
index 53e1810a5b0e0..2d30e089447f1 100644
--- a/lldb/tools/lldb-dap/Handler/InitializeRequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/InitializeRequestHandler.cpp
@@ -23,7 +23,7 @@ llvm::Expected<InitializeResponse> InitializeRequestHandler::Run(
     const InitializeRequestArguments &arguments) const {
   // Store initialization arguments for later use in Launch/Attach.
   dap.clientFeatures = arguments.supportedFeatures;
-  dap.sourceInitFile = arguments.lldbExtSourceInitFile.value_or(true);
+  dap.sourceInitFile = arguments.lldbExtSourceInitFile;
 
   return dap.GetCapabilities();
 }
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp
index d53a520ade39b..0a1d580bffd68 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp
@@ -216,12 +216,13 @@ bool fromJSON(const json::Value &Params, InitializeRequestArguments &IRA,
   }
 
   return OM.map("adapterID", IRA.adapterID) &&
-         OM.map("clientID", IRA.clientID) &&
-         OM.map("clientName", IRA.clientName) && OM.map("locale", IRA.locale) &&
-         OM.map("linesStartAt1", IRA.linesStartAt1) &&
-         OM.map("columnsStartAt1", IRA.columnsStartAt1) &&
+         OM.mapOptional("clientID", IRA.clientID) &&
+         OM.mapOptional("clientName", IRA.clientName) &&
+         OM.mapOptional("locale", IRA.locale) &&
+         OM.mapOptional("linesStartAt1", IRA.linesStartAt1) &&
+         OM.mapOptional("columnsStartAt1", IRA.columnsStartAt1) &&
          OM.mapOptional("pathFormat", IRA.pathFormat) &&
-         OM.map("$__lldb_sourceInitFile", IRA.lldbExtSourceInitFile);
+         OM.mapOptional("$__lldb_sourceInitFile", IRA.lldbExtSourceInitFile);
 }
 
 bool fromJSON(const json::Value &Params, Configuration &C, json::Path P) {
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
index 37fc2465f6a05..6a85033ae7ef2 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
@@ -108,23 +108,23 @@ struct InitializeRequestArguments {
   std::string adapterID;
 
   /// The ID of the client using this adapter.
-  std::optional<std::string> clientID;
+  std::string clientID;
 
   /// The human-readable name of the client using this adapter.
-  std::optional<std::string> clientName;
+  std::string clientName;
 
   /// The ISO-639 locale of the client using this adapter, e.g. en-US or de-CH.
-  std::optional<std::string> locale;
+  std::string locale;
 
   /// Determines in what format paths are specified. The default is `path`,
   /// which is the native format.
   PathFormat pathFormat = ePatFormatPath;
 
   /// If true all line numbers are 1-based (default).
-  std::optional<bool> linesStartAt1;
+  bool linesStartAt1 = true;
 
   /// If true all column numbers are 1-based (default).
-  std::optional<bool> columnsStartAt1;
+  bool columnsStartAt1 = true;
 
   /// The set of supported features reported by the client.
   llvm::DenseSet<ClientFeature> supportedFeatures;
@@ -133,7 +133,7 @@ struct InitializeRequestArguments {
   /// @{
 
   /// Source init files when initializing lldb::SBDebugger.
-  std::optional<bool> lldbExtSourceInitFile;
+  bool lldbExtSourceInitFile = true;
 
   /// @}
 };
diff --git a/lldb/unittests/DAP/ProtocolRequestsTest.cpp b/lldb/unittests/DAP/ProtocolRequestsTest.cpp
index ba9aef1e5fcc5..a74c369924b8e 100644
--- a/lldb/unittests/DAP/ProtocolRequestsTest.cpp
+++ b/lldb/unittests/DAP/ProtocolRequestsTest.cpp
@@ -77,7 +77,7 @@ TEST(ProtocolRequestsTest, EvaluateArguments) {
   EXPECT_EQ(expected->expression, "hello world");
   EXPECT_EQ(expected->context, eEvaluateContextRepl);
 
-  // Check required keys;
+  // Check required keys.
   EXPECT_THAT_EXPECTED(parse<EvaluateArguments>(R"({})"),
                        FailedWithMessage("missing value at (root).expression"));
 }
@@ -118,3 +118,67 @@ TEST(ProtocolRequestsTest, EvaluateResponseBody) {
   ASSERT_THAT_EXPECTED(expected_opt, llvm::Succeeded());
   EXPECT_EQ(PrettyPrint(*expected_opt), PrettyPrint(body));
 }
+
+TEST(ProtocolRequestsTest, InitializeRequestArguments) {
+  llvm::Expected<InitializeRequestArguments> expected =
+      parse<InitializeRequestArguments>(R"({"adapterID": "myid"})");
+  ASSERT_THAT_EXPECTED(expected, llvm::Succeeded());
+  EXPECT_EQ(expected->adapterID, "myid");
+
+  // Check optional keys.
+  expected = parse<InitializeRequestArguments>(R"({
+    "adapterID": "myid",
+    "clientID": "myclientid",
+    "clientName": "lldb-dap-unit-tests",
+    "locale": "en-US",
+    "linesStartAt1": true,
+    "columnsStartAt1": true,
+    "pathFormat": "uri",
+    "supportsVariableType": true,
+    "supportsVariablePaging": true,
+    "supportsRunInTerminalRequest": true,
+    "supportsMemoryReferences": true,
+    "supportsProgressReporting": true,
+    "supportsInvalidatedEvent": true,
+    "supportsMemoryEvent": true,
+    "supportsArgsCanBeInterpretedByShell": true,
+    "supportsStartDebuggingRequest": true,
+    "supportsANSIStyling": true
+  })");
+  ASSERT_THAT_EXPECTED(expected, llvm::Succeeded());
+  EXPECT_EQ(expected->adapterID, "myid");
+  EXPECT_EQ(expected->clientID, "myclientid");
+  EXPECT_EQ(expected->clientName, "lldb-dap-unit-tests");
+  EXPECT_EQ(expected->locale, "en-US");
+  EXPECT_EQ(expected->linesStartAt1, true);
+  EXPECT_EQ(expected->columnsStartAt1, true);
+  EXPECT_EQ(expected->pathFormat, ePathFormatURI);
+  EXPECT_EQ(expected->supportedFeatures.contains(eClientFeatureVariableType),
+            true);
+  EXPECT_EQ(
+      expected->supportedFeatures.contains(eClientFeatureRunInTerminalRequest),
+      true);
+  EXPECT_EQ(
+      expected->supportedFeatures.contains(eClientFeatureMemoryReferences),
+      true);
+  EXPECT_EQ(
+      expected->supportedFeatures.contains(eClientFeatureProgressReporting),
+      true);
+  EXPECT_EQ(
+      expected->supportedFeatures.contains(eClientFeatureInvalidatedEvent),
+      true);
+  EXPECT_EQ(expected->supportedFeatures.contains(eClientFeatureMemoryEvent),
+            true);
+  EXPECT_EQ(expected->supportedFeatures.contains(
+                eClientFeatureArgsCanBeInterpretedByShell),
+            true);
+  EXPECT_EQ(
+      expected->supportedFeatures.contains(eClientFeatureStartDebuggingRequest),
+      true);
+  EXPECT_EQ(expected->supportedFeatures.contains(eClientFeatureANSIStyling),
+            true);
+
+  // Check required keys.
+  EXPECT_THAT_EXPECTED(parse<InitializeRequestArguments>(R"({})"),
+                       FailedWithMessage("missing value at (root).adapterID"));
+}
diff --git a/lldb/unittests/Expression/DWARFExpressionTest.cpp b/lldb/unittests/Expression/DWARFExpressionTest.cpp
index 0126c408d8696..f264fb3ce94e5 100644
--- a/lldb/unittests/Expression/DWARFExpressionTest.cpp
+++ b/lldb/unittests/Expression/DWARFExpressionTest.cpp
@@ -1217,3 +1217,107 @@ TEST_F(DWARFExpressionMockProcessTestWithAArch, DW_op_deref_no_ptr_fixing) {
   llvm::Expected<Value> result_deref = evaluate_expr(expr_deref);
   EXPECT_THAT_EXPECTED(result_deref, ExpectLoadAddress(expected_value));
 }
+
+TEST_F(DWARFExpressionMockProcessTest, deref_register) {
+  TestContext test_ctx;
+  constexpr uint32_t reg_r0 = 0x504;
+  MockMemory::Map memory = {
+      {{0x004, 4}, {0x1, 0x2, 0x3, 0x4}},
+      {{0x504, 4}, {0xa, 0xb, 0xc, 0xd}},
+      {{0x505, 4}, {0x5, 0x6, 0x7, 0x8}},
+  };
+  ASSERT_TRUE(CreateTestContext(&test_ctx, "i386-pc-linux",
+                                RegisterValue(reg_r0), memory, memory));
+
+  ExecutionContext exe_ctx(test_ctx.process_sp);
+  MockDwarfDelegate delegate = MockDwarfDelegate::Dwarf5();
+  auto Eval = [&](llvm::ArrayRef<uint8_t> expr_data) {
+    ExecutionContext exe_ctx(test_ctx.process_sp);
+    return Evaluate(expr_data, {}, &delegate, &exe_ctx,
+                    test_ctx.reg_ctx_sp.get());
+  };
+
+  // Reads from the register r0.
+  // Sets the context to RegisterInfo so we know this is a register location.
+  EXPECT_THAT_EXPECTED(Eval({DW_OP_reg0}),
+                       ExpectScalar(reg_r0, Value::ContextType::RegisterInfo));
+
+  // Reads from the location(register r0).
+  // Clears the context so we know this is a value not a location.
+  EXPECT_THAT_EXPECTED(Eval({DW_OP_reg0, DW_OP_deref}),
+                       ExpectLoadAddress(reg_r0, Value::ContextType::Invalid));
+
+  // Reads from the location(register r0) and adds the value to the host buffer.
+  // The evaluator should implicitly convert it to a memory location when
+  // added to a composite value and should add the contents of memory[r0]
+  // to the host buffer.
+  EXPECT_THAT_EXPECTED(Eval({DW_OP_reg0, DW_OP_deref, DW_OP_piece, 4}),
+                       ExpectHostAddress({0xa, 0xb, 0xc, 0xd}));
+
+  // Reads from the location(register r0) and truncates the value to one byte.
+  // Clears the context so we know this is a value not a location.
+  EXPECT_THAT_EXPECTED(
+      Eval({DW_OP_reg0, DW_OP_deref_size, 1}),
+      ExpectLoadAddress(reg_r0 & 0xff, Value::ContextType::Invalid));
+
+  // Reads from the location(register r0) and truncates to one byte then adds
+  // the value to the host buffer. The evaluator should implicitly convert it to
+  // a memory location when added to a composite value and should add the
+  // contents of memory[r0 & 0xff] to the host buffer.
+  EXPECT_THAT_EXPECTED(Eval({DW_OP_reg0, DW_OP_deref_size, 1, DW_OP_piece, 4}),
+                       ExpectHostAddress({0x1, 0x2, 0x3, 0x4}));
+
+  // Reads from the register r0 + 1.
+  EXPECT_THAT_EXPECTED(
+      Eval({DW_OP_breg0, 1}),
+      ExpectLoadAddress(reg_r0 + 1, Value::ContextType::Invalid));
+
+  // Reads from address r0 + 1, which contains the bytes [5,6,7,8].
+  EXPECT_THAT_EXPECTED(
+      Eval({DW_OP_breg0, 1, DW_OP_deref}),
+      ExpectLoadAddress(0x08070605, Value::ContextType::Invalid));
+}
+
+TEST_F(DWARFExpressionMockProcessTest, deref_implicit_value) {
+  TestContext test_ctx;
+  MockMemory::Map memory = {
+      {{0x4, 1}, {0x1}},
+      {{0x4, 4}, {0x1, 0x2, 0x3, 0x4}},
+  };
+  ASSERT_TRUE(CreateTestContext(&test_ctx, "i386-pc-linux", {}, memory));
+
+  ExecutionContext exe_ctx(test_ctx.process_sp);
+  MockDwarfDelegate delegate = MockDwarfDelegate::Dwarf5();
+  auto Eval = [&](llvm::ArrayRef<uint8_t> expr_data) {
+    ExecutionContext exe_ctx(test_ctx.process_sp);
+    return Evaluate(expr_data, {}, &delegate, &exe_ctx,
+                    test_ctx.reg_ctx_sp.get());
+  };
+
+  // Creates an implicit location with a value of 4.
+  EXPECT_THAT_EXPECTED(Eval({DW_OP_lit4, DW_OP_stack_value}),
+                       ExpectScalar(0x4));
+
+  // Creates an implicit location with a value of 4. The deref reads the value
+  // out of the location and implicitly converts it to a load address.
+  EXPECT_THAT_EXPECTED(Eval({DW_OP_lit4, DW_OP_stack_value, DW_OP_deref}),
+                       ExpectLoadAddress(0x4));
+
+  // Creates an implicit location with a value of 0x504 (uleb128(0x504) =
+  // 0xa84). The deref reads the low byte out of the location and implicitly
+  // converts it to a load address.
+  EXPECT_THAT_EXPECTED(
+      Eval({DW_OP_constu, 0x84, 0xa, DW_OP_stack_value, DW_OP_deref_size, 1}),
+      ExpectLoadAddress(0x4));
+
+  // The tests below are similar to the ones above, but there is no implicit
+  // location created by a stack_value operation. They are provided here as a
+  // reference to contrast with the above tests.
+  EXPECT_THAT_EXPECTED(Eval({DW_OP_lit4}), ExpectLoadAddress(0x4));
+
+  EXPECT_THAT_EXPECTED(Eval({DW_OP_lit4, DW_OP_deref}),
+                       ExpectLoadAddress(0x04030201));
+
+  EXPECT_THAT_EXPECTED(Eval({DW_OP_lit4, DW_OP_deref_size, 1}),
+                       ExpectLoadAddress(0x01));
+}
diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp
index a63b740d9472f..5694aeeff3e5b 100644
--- a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp
+++ b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp
@@ -136,6 +136,11 @@ lldb_private::python::LLDBSWIGPython_CastPyObjectToSBStream(PyObject *data) {
   return nullptr;
 }
 
+void *
+lldb_private::python::LLDBSWIGPython_CastPyObjectToSBThread(PyObject *data) {
+  return nullptr;
+}
+
 void *
 lldb_private::python::LLDBSWIGPython_CastPyObjectToSBFrame(PyObject *data) {
   return nullptr;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index 569407963695e..483afb426fa10 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -35,9 +35,6 @@ class LLVM_ABI InstructionSelector : public GIMatchTableExecutor {
   ///       !isPreISelGenericOpcode(I.getOpcode())
   virtual bool select(MachineInstr &I) = 0;
 
-  // FIXME: Eliminate dependency on TargetPassConfig for NewPM transition
-  const TargetPassConfig *TPC = nullptr;
-
   MachineOptimizationRemarkEmitter *MORE = nullptr;
 
   /// Note: InstructionSelect does not track changed instructions.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
index 076c70d21bbdf..6060bb6144c62 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
@@ -510,9 +510,6 @@ class RegBankSelect : public MachineFunctionPass {
   /// Optimization mode of the pass.
   Mode OptMode;
 
-  /// Current target configuration. Controls how the pass handles errors.
-  const TargetPassConfig *TPC;
-
   /// Assign the register bank of each operand of \p MI.
   /// \return True on success, false otherwise.
   bool assignInstr(MachineInstr &MI);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index e1aa8eceefd3f..da2742e089f8f 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -155,12 +155,10 @@ LLVM_ABI bool isTriviallyDead(const MachineInstr &MI,
 /// Report an ISel error as a missed optimization remark to the LLVMContext's
 /// diagnostic stream.  Set the FailedISel MachineFunction property.
 LLVM_ABI void reportGISelFailure(MachineFunction &MF,
-                                 const TargetPassConfig &TPC,
                                  MachineOptimizationRemarkEmitter &MORE,
                                  MachineOptimizationRemarkMissed &R);
 
 LLVM_ABI void reportGISelFailure(MachineFunction &MF,
-                                 const TargetPassConfig &TPC,
                                  MachineOptimizationRemarkEmitter &MORE,
                                  const char *PassName, StringRef Msg,
                                  const MachineInstr &MI);
@@ -168,7 +166,6 @@ LLVM_ABI void reportGISelFailure(MachineFunction &MF,
 /// Report an ISel warning as a missed optimization remark to the LLVMContext's
 /// diagnostic stream.
 LLVM_ABI void reportGISelWarning(MachineFunction &MF,
-                                 const TargetPassConfig &TPC,
                                  MachineOptimizationRemarkEmitter &MORE,
                                  MachineOptimizationRemarkMissed &R);
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index b2697c81fd825..149366c69bdcc 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1243,7 +1243,7 @@ class LLVM_ABI TargetLoweringBase {
   /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
   /// true and store the intrinsic information into the IntrinsicInfo that was
   /// passed to the function.
-  virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
+  virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallBase &,
                                   MachineFunction &,
                                   unsigned /*Intrinsic*/) const {
     return false;
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index 3787e2591a4c1..3b475c8d5614d 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -972,6 +972,13 @@ def int_arm_mve_vmaxnma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
    [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
     [IntrNoMem]>;
 
+def int_arm_mve_vminnm: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, LLVMMatchType<0>],
+    [IntrNoMem]>;
+def int_arm_mve_vmaxnm: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, LLVMMatchType<0>],
+    [IntrNoMem]>;
+
 multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params,
                          LLVMType pred = llvm_anyvector_ty,
                          list<IntrinsicProperty> props = [IntrNoMem],
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 60d3535ea6097..5f6718d6cbcd8 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -13998,7 +13998,15 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
 }
 
 namespace llvm {
-raw_ostream &operator<<(raw_ostream &OS, ScalarEvolution::LoopDisposition LD) {
+// Note: these overloaded operators need to be in the llvm namespace for them
+// to be resolved correctly. If we put them outside the llvm namespace, the
+//
+// OS << ": " << SE.getLoopDisposition(SV, InnerL);
+//
+// code below "breaks" and start printing raw enum values as opposed to the
+// string values.
+static raw_ostream &operator<<(raw_ostream &OS,
+                               ScalarEvolution::LoopDisposition LD) {
   switch (LD) {
   case ScalarEvolution::LoopVariant:
     OS << "Variant";
@@ -14013,7 +14021,8 @@ raw_ostream &operator<<(raw_ostream &OS, ScalarEvolution::LoopDisposition LD) {
   return OS;
 }
 
-raw_ostream &operator<<(raw_ostream &OS, ScalarEvolution::BlockDisposition BD) {
+static raw_ostream &operator<<(raw_ostream &OS,
+                               llvm::ScalarEvolution::BlockDisposition BD) {
   switch (BD) {
   case ScalarEvolution::DoesNotDominateBlock:
     OS << "DoesNotDominate";
diff --git a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
index bce41f9f5329e..4e422539ff9f6 100644
--- a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
+++ b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
@@ -29,8 +29,6 @@ class Type;
 
 using namespace llvm;
 
-namespace {
-
 static inline int sizeOfSCEV(const SCEV *S) {
   struct FindSCEVSize {
     int Size = 0;
@@ -52,8 +50,6 @@ static inline int sizeOfSCEV(const SCEV *S) {
   return F.Size;
 }
 
-} // namespace
-
 // Computes the Quotient and Remainder of the division of Numerator by
 // Denominator.
 void SCEVDivision::divide(ScalarEvolution &SE, const SCEV *Numerator,
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index dbceb8e557849..eb8650fd0eb60 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5877,6 +5877,12 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
     break;
   }
   case Instruction::ShuffleVector: {
+    // Handle vector splat idiom
+    if (Value *Splat = getSplatValue(V)) {
+      computeKnownFPClass(Splat, Known, InterestedClasses, Q, Depth + 1);
+      break;
+    }
+
     // For undef elements, we don't know anything about the common state of
     // the shuffle result.
     APInt DemandedLHS, DemandedRHS;
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 88e554244c663..ad821231035f0 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -111,17 +111,18 @@ INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
                 false, false)
 
 static void reportTranslationError(MachineFunction &MF,
-                                   const TargetPassConfig &TPC,
                                    OptimizationRemarkEmitter &ORE,
                                    OptimizationRemarkMissed &R) {
   MF.getProperties().setFailedISel();
+  bool IsGlobalISelAbortEnabled =
+      MF.getTarget().Options.GlobalISelAbort == GlobalISelAbortMode::Enable;
 
   // Print the function name explicitly if we don't have a debug location (which
   // makes the diagnostic less useful) or if we're going to emit a raw error.
-  if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
+  if (!R.getLocation().isValid() || IsGlobalISelAbortEnabled)
     R << (" (in function: " + MF.getName() + ")").str();
 
-  if (TPC.isGlobalISelAbortEnabled())
+  if (IsGlobalISelAbortEnabled)
     report_fatal_error(Twine(R.getMsg()));
   else
     ORE.emit(R);
@@ -242,7 +243,7 @@ ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
                                  MF->getFunction().getSubprogram(),
                                  &MF->getFunction().getEntryBlock());
       R << "unable to translate constant: " << ore::NV("Type", Val.getType());
-      reportTranslationError(*MF, *TPC, *ORE, R);
+      reportTranslationError(*MF, *ORE, R);
       return *VRegs;
     }
   }
@@ -279,7 +280,7 @@ Align IRTranslator::getMemOpAlign(const Instruction &I) {
 
   OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
   R << "unable to translate memop: " << ore::NV("Opcode", &I);
-  reportTranslationError(*MF, *TPC, *ORE, R);
+  reportTranslationError(*MF, *ORE, R);
   return Align(1);
 }
 
@@ -4150,7 +4151,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
     OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                F.getSubprogram(), &F.getEntryBlock());
     R << "unable to translate in big endian mode";
-    reportTranslationError(*MF, *TPC, *ORE, R);
+    reportTranslationError(*MF, *ORE, R);
     return false;
   }
 
@@ -4194,7 +4195,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
                                F.getSubprogram(), &F.getEntryBlock());
     R << "unable to lower function: "
       << ore::NV("Prototype", F.getFunctionType());
-    reportTranslationError(*MF, *TPC, *ORE, R);
+    reportTranslationError(*MF, *ORE, R);
     return false;
   }
 
@@ -4217,7 +4218,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
                                F.getSubprogram(), &F.getEntryBlock());
     R << "unable to lower arguments: "
       << ore::NV("Prototype", F.getFunctionType());
-    reportTranslationError(*MF, *TPC, *ORE, R);
+    reportTranslationError(*MF, *ORE, R);
     return false;
   }
 
@@ -4268,7 +4269,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
           R << ": '" << InstStrStorage << "'";
         }
 
-        reportTranslationError(*MF, *TPC, *ORE, R);
+        reportTranslationError(*MF, *ORE, R);
         return false;
       }
 
@@ -4276,7 +4277,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
         OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                    BB->getTerminator()->getDebugLoc(), BB);
         R << "unable to translate basic block";
-        reportTranslationError(*MF, *TPC, *ORE, R);
+        reportTranslationError(*MF, *ORE, R);
         return false;
       }
     }
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 2dd22c8a7e8ba..1d281ab83aacc 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -137,7 +137,6 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
     return false;
 
   ISel = MF.getSubtarget().getInstructionSelector();
-  ISel->TPC = &getAnalysis<TargetPassConfig>();
 
   // FIXME: Properly override OptLevel in TargetMachine. See OptLevelChanger
   CodeGenOptLevel OldOptLevel = OptLevel;
@@ -159,7 +158,6 @@ bool InstructionSelect::selectMachineFunction(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');
   assert(ISel && "Cannot work without InstructionSelector");
 
-  const TargetPassConfig &TPC = *ISel->TPC;
   CodeGenCoverage CoverageInfo;
   ISel->setupMF(MF, VT, &CoverageInfo, PSI, BFI);
 
@@ -177,8 +175,8 @@ bool InstructionSelect::selectMachineFunction(MachineFunction &MF) {
   // property check already is.
   if (!DisableGISelLegalityCheck)
     if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) {
-      reportGISelFailure(MF, TPC, MORE, "gisel-select",
-                         "instruction is not legal", *MI);
+      reportGISelFailure(MF, MORE, "gisel-select", "instruction is not legal",
+                         *MI);
       return false;
     }
   // FIXME: We could introduce new blocks and will need to fix the outer loop.
@@ -215,8 +213,7 @@ bool InstructionSelect::selectMachineFunction(MachineFunction &MF) {
         if (!selectInstr(MI)) {
           LLVM_DEBUG(dbgs() << "Selection failed!\n";
                      MIIMaintainer.reportFullyCreatedInstrs());
-          reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select",
-                             MI);
+          reportGISelFailure(MF, MORE, "gisel-select", "cannot select", MI);
           return false;
         }
         LLVM_DEBUG(MIIMaintainer.reportFullyCreatedInstrs());
@@ -279,7 +276,7 @@ bool InstructionSelect::selectMachineFunction(MachineFunction &MF) {
 
     const TargetRegisterClass *RC = MRI.getRegClassOrNull(VReg);
     if (!RC) {
-      reportGISelFailure(MF, TPC, MORE, "gisel-select",
+      reportGISelFailure(MF, MORE, "gisel-select",
                          "VReg has no regclass after selection", *MI);
       return false;
     }
@@ -288,7 +285,7 @@ bool InstructionSelect::selectMachineFunction(MachineFunction &MF) {
     if (Ty.isValid() &&
         TypeSize::isKnownGT(Ty.getSizeInBits(), TRI.getRegSizeInBits(*RC))) {
       reportGISelFailure(
-          MF, TPC, MORE, "gisel-select",
+          MF, MORE, "gisel-select",
           "VReg's low-level type and register class have different sizes", *MI);
       return false;
     }
@@ -299,7 +296,7 @@ bool InstructionSelect::selectMachineFunction(MachineFunction &MF) {
                                       MF.getFunction().getSubprogram(),
                                       /*MBB=*/nullptr);
     R << "inserting blocks is not supported yet";
-    reportGISelFailure(MF, TPC, MORE, R);
+    reportGISelFailure(MF, MORE, R);
     return false;
   }
 #endif
diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index aef16b5f33af4..0f0656aaa4f45 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -348,7 +348,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
                                             *MIRBuilder, VT);
 
   if (Result.FailedOn) {
-    reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
+    reportGISelFailure(MF, MORE, "gisel-legalize",
                        "unable to legalize instruction", *Result.FailedOn);
     return false;
   }
@@ -360,7 +360,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
     R << "lost "
       << ore::NV("NumLostDebugLocs", LocObserver.getNumLostDebugLocs())
       << " debug locations during pass";
-    reportGISelWarning(MF, TPC, MORE, R);
+    reportGISelWarning(MF, MORE, R);
     // Example remark:
     // --- !Missed
     // Pass:            gisel-legalize
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index bcb4f1c551cfd..5db631be32acd 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -39,6 +39,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -83,7 +84,6 @@ void RegBankSelect::init(MachineFunction &MF) {
   assert(RBI && "Cannot work without RegisterBankInfo");
   MRI = &MF.getRegInfo();
   TRI = MF.getSubtarget().getRegisterInfo();
-  TPC = &getAnalysis<TargetPassConfig>();
   if (OptMode != Mode::Fast) {
     MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
     MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
@@ -308,7 +308,8 @@ const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
         RepairPts.emplace_back(std::move(RepairPt));
     }
   }
-  if (!BestMapping && !TPC->isGlobalISelAbortEnabled()) {
+  if (!BestMapping && MI.getMF()->getTarget().Options.GlobalISelAbort !=
+                          GlobalISelAbortMode::Enable) {
     // If none of the mapping worked that means they are all impossible.
     // Thus, pick the first one and set an impossible repairing point.
     // It will trigger the failed isel mode.
@@ -708,7 +709,7 @@ bool RegBankSelect::assignRegisterBanks(MachineFunction &MF) {
         continue;
 
       if (!assignInstr(MI)) {
-        reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
+        reportGISelFailure(MF, *MORE, "gisel-regbankselect",
                            "unable to map instruction", MI);
         return false;
       }
@@ -722,7 +723,7 @@ bool RegBankSelect::checkFunctionIsLegal(MachineFunction &MF) const {
 #ifndef NDEBUG
   if (!DisableGISelLegalityCheck) {
     if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) {
-      reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
+      reportGISelFailure(MF, *MORE, "gisel-regbankselect",
                          "instruction is not legal", *MI);
       return false;
     }
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index bc01cb65c4a69..15e81f5773b69 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -234,11 +234,11 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
 
 static void reportGISelDiagnostic(DiagnosticSeverity Severity,
                                   MachineFunction &MF,
-                                  const TargetPassConfig &TPC,
                                   MachineOptimizationRemarkEmitter &MORE,
                                   MachineOptimizationRemarkMissed &R) {
-  bool IsFatal = Severity == DS_Error &&
-                 TPC.isGlobalISelAbortEnabled();
+  bool IsGlobalISelAbortEnabled =
+      MF.getTarget().Options.GlobalISelAbort == GlobalISelAbortMode::Enable;
+  bool IsFatal = Severity == DS_Error && IsGlobalISelAbortEnabled;
   // Print the function name explicitly if we don't have a debug location (which
   // makes the diagnostic less useful) or if we're going to emit a raw error.
   if (!R.getLocation().isValid() || IsFatal)
@@ -250,20 +250,20 @@ static void reportGISelDiagnostic(DiagnosticSeverity Severity,
     MORE.emit(R);
 }
 
-void llvm::reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
+void llvm::reportGISelWarning(MachineFunction &MF,
                               MachineOptimizationRemarkEmitter &MORE,
                               MachineOptimizationRemarkMissed &R) {
-  reportGISelDiagnostic(DS_Warning, MF, TPC, MORE, R);
+  reportGISelDiagnostic(DS_Warning, MF, MORE, R);
 }
 
-void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
+void llvm::reportGISelFailure(MachineFunction &MF,
                               MachineOptimizationRemarkEmitter &MORE,
                               MachineOptimizationRemarkMissed &R) {
   MF.getProperties().setFailedISel();
-  reportGISelDiagnostic(DS_Error, MF, TPC, MORE, R);
+  reportGISelDiagnostic(DS_Error, MF, MORE, R);
 }
 
-void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
+void llvm::reportGISelFailure(MachineFunction &MF,
                               MachineOptimizationRemarkEmitter &MORE,
                               const char *PassName, StringRef Msg,
                               const MachineInstr &MI) {
@@ -271,9 +271,10 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
                                     MI.getDebugLoc(), MI.getParent());
   R << Msg;
   // Printing MI is expensive;  only do it if expensive remarks are enabled.
-  if (TPC.isGlobalISelAbortEnabled() || MORE.allowExtraAnalysis(PassName))
+  if (MF.getTarget().Options.GlobalISelAbort == GlobalISelAbortMode::Enable ||
+      MORE.allowExtraAnalysis(PassName))
     R << ": " << ore::MNV("Inst", MI);
-  reportGISelFailure(MF, TPC, MORE, R);
+  reportGISelFailure(MF, MORE, R);
 }
 
 unsigned llvm::getInverseGMinMaxOpcode(unsigned MinMaxOpc) {
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index a02af59600c44..4e242311e290f 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -19,7 +19,6 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/StackSafetyAnalysis.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d3d57b99a6345..a83185d6ade20 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17227,7 +17227,7 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
 template <unsigned NumVecs>
 static bool
 setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
-              AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
+              AArch64TargetLowering::IntrinsicInfo &Info, const CallBase &CI) {
   Info.opc = ISD::INTRINSIC_VOID;
   // Retrieve EC from first vector argument.
   const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
@@ -17252,7 +17252,7 @@ setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
 /// specified in the intrinsic calls.
 bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                               const CallInst &I,
+                                               const CallBase &I,
                                                MachineFunction &MF,
                                                unsigned Intrinsic) const {
   auto &DL = I.getDataLayout();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index b6d34f97c7b48..1d4446d287462 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -206,7 +206,7 @@ class AArch64TargetLowering : public TargetLowering {
   EmitInstrWithCustomInserter(MachineInstr &MI,
                               MachineBasicBlock *MBB) const override;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5dea64844e64e..215ef67a9bc4f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2124,7 +2124,7 @@ def FeatureISAVersion12 : FeatureSet<
    FeatureBVHDualAndBVH8Insts,
    FeatureWaitsBeforeSystemScopeStores,
    FeatureD16Writes32BitVgpr,
-   FeatureCubeInsts, 
+   FeatureCubeInsts,
    FeatureLerpInst, 
    FeatureSadInsts,
    FeatureQsadInsts, 
@@ -2137,7 +2137,6 @@ def FeatureISAVersion12_50_Common : FeatureSet<
   [FeatureGFX12,
    FeatureGFX1250Insts,
    FeatureRequiresAlignedVGPRs,
-   FeatureAddressableLocalMemorySize327680,
    FeatureCuMode,
    Feature1024AddressableVGPRs,
    Feature64BitLiterals,
@@ -2206,17 +2205,18 @@ def FeatureISAVersion12_50_Common : FeatureSet<
    FeatureXNACK,
    FeatureClusters,
    FeatureD16Writes32BitVgpr,
-]>;
-
-def FeatureISAVersion12_50 : FeatureSet<
-  !listconcat(FeatureISAVersion12_50_Common.Features,
-  [FeatureCubeInsts, 
+   FeatureCubeInsts,
    FeatureLerpInst, 
    FeatureSadInsts, 
    FeatureQsadInsts, 
    FeatureCvtNormInsts, 
    FeatureCvtPkNormVOP2Insts, 
-   FeatureCvtPkNormVOP3Insts])>;
+   FeatureCvtPkNormVOP3Insts
+]>;
+
+def FeatureISAVersion12_50 : FeatureSet<
+  !listconcat(FeatureISAVersion12_50_Common.Features,
+  [FeatureAddressableLocalMemorySize327680])>;
 
 def FeatureISAVersion12_51 : FeatureSet<
   !listconcat(FeatureISAVersion12_50.Features,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 78a3ec7f0c266..8698e816ddbb9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -4451,16 +4451,14 @@ bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode *N) const {
   const auto *Ld = cast<LoadSDNode>(N);
   const MachineMemOperand *MMO = Ld->getMemOperand();
 
-  if (Ld->isDivergent()) {
-    // FIXME: We ought to able able to take the direct isDivergent result. We
-    // cannot rely on the MMO for a uniformity check, and should stop using
-    // it. This is a hack for 2 ways that the IR divergence analysis is superior
-    // to the DAG divergence: Recognizing shift-of-workitem-id as always
-    // uniform, and isSingleLaneExecution. These should be handled in the DAG
-    // version, and then this can be dropped.
-    if (!MMO->getValue() || !AMDGPU::isUniformMMO(MMO))
-      return false;
-  }
+  // FIXME: We ought to able able to take the direct isDivergent result. We
+  // cannot rely on the MMO for a uniformity check, and should stop using
+  // it. This is a hack for 2 ways that the IR divergence analysis is superior
+  // to the DAG divergence: Recognizing shift-of-workitem-id as always
+  // uniform, and isSingleLaneExecution. These should be handled in the DAG
+  // version, and then this can be dropped.
+  if (Ld->isDivergent() && !AMDGPU::isUniformMMO(MMO))
+    return false;
 
   return MMO->getSize().hasValue() &&
          Ld->getAlign() >=
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index b7b87674ee658..2b1f4048947bf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -35,15 +35,13 @@ bool AMDGPU::isUniformMMO(const MachineMemOperand *MMO) {
              PSV->isJumpTable();
     }
 
-    // FIXME: null value is should be treated as unknown, not as uniform.
-    return true;
+    // Unknown value.
+    return false;
   }
 
   // UndefValue means this is a load of a kernel input.  These are uniform.
   // Sometimes LDS instructions have constant pointers.
-  // If Ptr is null, then that means this mem operand contains a
-  // PseudoSourceValue like GOT.
-  if (!Ptr || isa<UndefValue, Constant, GlobalValue>(Ptr))
+  if (isa<UndefValue, Constant, GlobalValue>(Ptr))
     return true;
 
   if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 6e41134bb3bc1..ae62dbe1cc706 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2362,7 +2362,7 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
   if (!loadInputValue(QueuePtr, B, AMDGPUFunctionArgInfo::QUEUE_PTR))
     return Register();
 
-  // TODO: can we be smarter about machine pointer info?
+  // TODO: Use custom PseudoSourceValue
   MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
 
   // Offset into amd_queue_t for group_segment_aperture_base_hi /
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index 5c80a9762ff69..11cafdec8d3c3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -100,8 +100,8 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
 
   /// Compute the register class constraints based on the uses of \p Reg,
   /// excluding MFMA uses from which can be rewritten to change the register
-  /// class constraint. This should be nearly identical to
-  /// MachineRegisterInfo::recomputeRegClass.
+  /// class constraint. MFMA scale operands need to be constraint checked.
+  /// This should be nearly identical to MachineRegisterInfo::recomputeRegClass.
 
   /// \p RewriteCandidates will collect the set of MFMA instructions that need
   /// to have the opcode mutated to perform the replacement.
@@ -155,9 +155,16 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
 
       // We can swap the classes of dst + src2 as a pair to AGPR, so ignore the
       // effects of rewrite candidates. It just so happens that we can use
-      // either AGPR or VGPR in src0/src1, so don't bother checking the
-      // constraint effects of the individual operands.
+      // either AGPR or VGPR in src0/src1. We still need to check constraint
+      // effects for scale variant, which does not allow AGPR.
       if (isRewriteCandidate(*MI)) {
+        int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode());
+        const MCInstrDesc &AGPRDesc = TII.get(AGPROp);
+        const TargetRegisterClass *NewRC =
+            TII.getRegClass(AGPRDesc, MO.getOperandNo());
+        if (!TRI.hasAGPRs(NewRC))
+          return false;
+
         const MachineOperand *VDst =
             TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
         const MachineOperand *Src2 =
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 55370a54f07a0..70f8c0cc2ae9c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1309,7 +1309,7 @@ static unsigned getIntrMemWidth(unsigned IntrID) {
   }
 }
 
-static void getCoopAtomicOperandsInfo(const CallInst &CI, bool IsLoad,
+static void getCoopAtomicOperandsInfo(const CallBase &CI, bool IsLoad,
                                       TargetLoweringBase::IntrinsicInfo &Info) {
   Value *OrderingArg = CI.getArgOperand(IsLoad ? 1 : 2);
   unsigned Ord = cast<ConstantInt>(OrderingArg)->getZExtValue();
@@ -1339,7 +1339,7 @@ static void getCoopAtomicOperandsInfo(const CallInst &CI, bool IsLoad,
 }
 
 bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                          const CallInst &CI,
+                                          const CallBase &CI,
                                           MachineFunction &MF,
                                           unsigned IntrID) const {
   Info.flags = MachineMemOperand::MONone;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 40c03ca024c6c..fb162948caf4c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -334,7 +334,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
   MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
+  bool getTgtMemIntrinsic(IntrinsicInfo &, const CallBase &,
                           MachineFunction &MF,
                           unsigned IntrinsicID) const override;
 
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 1a8c470600394..2d26c67a8077a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -20665,7 +20665,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
 /// specified in the intrinsic calls.
 bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                           const CallInst &I,
+                                           const CallBase &I,
                                            MachineFunction &MF,
                                            unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 8191eb40a712a..d0fb58c764edd 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -315,8 +315,7 @@ class VectorType;
     bool isFPImmLegal(const APFloat &Imm, EVT VT,
                       bool ForCodeSize = false) const override;
 
-    bool getTgtMemIntrinsic(IntrinsicInfo &Info,
-                            const CallInst &I,
+    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                             MachineFunction &MF,
                             unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index f9aaacb7f5250..097318711d137 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -393,6 +393,12 @@ def vsub : PatFrags<(ops node:$lhs, node:$rhs),
 def vmul : PatFrags<(ops node:$lhs, node:$rhs),
                     [(fmul node:$lhs, node:$rhs),
                      (int_arm_mve_vmul node:$lhs, node:$rhs)]>;
+def vminnm : PatFrags<(ops node:$lhs, node:$rhs),
+                    [(fminnum node:$lhs, node:$rhs),
+                     (int_arm_mve_vminnm node:$lhs, node:$rhs)]>;
+def vmaxnm : PatFrags<(ops node:$lhs, node:$rhs),
+                    [(fmaxnum node:$lhs, node:$rhs),
+                     (int_arm_mve_vmaxnm node:$lhs, node:$rhs)]>;
 
 // --------- Start of base classes for the instructions themselves
 
@@ -1489,7 +1495,7 @@ class MVE_VMINMAXNM<string iname, string suffix, bits<2> sz, bit bit_21,
   let validForTailPredication = 1;
 }
 
-multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode Op, Intrinsic PredInt> {
+multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic PredInt> {
   def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size, bit_4>;
 
   let Predicates = [HasMVEFloat] in {
@@ -1497,10 +1503,10 @@ multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode
   }
 }
 
-defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, fmaxnum, int_arm_mve_max_predicated>;
-defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, fmaxnum, int_arm_mve_max_predicated>;
-defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, fminnum, int_arm_mve_min_predicated>;
-defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_mve_min_predicated>;
+defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, vmaxnm, int_arm_mve_max_predicated>;
+defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, vmaxnm, int_arm_mve_max_predicated>;
+defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, vminnm, int_arm_mve_min_predicated>;
+defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, vminnm, int_arm_mve_min_predicated>;
 
 
 class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
@@ -4148,7 +4154,7 @@ class MVE_VMAXMINNMA<string iname, string suffix, bits<2> size, bit bit_12,
 }
 
 multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
-                      SDNode unpred_op, Intrinsic pred_int,
+                      SDPatternOperator unpred_op, Intrinsic pred_int,
                       bit bit_12> {
   def "" : MVE_VMAXMINNMA<iname, VTI.Suffix, VTI.Size, bit_12>;
   defvar Inst = !cast<Instruction>(NAME);
@@ -4168,13 +4174,13 @@ multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
 }
 
 multiclass MVE_VMAXNMA<MVEVectorVTInfo VTI, bit bit_12>
-  : MVE_VMAXMINNMA_m<"vmaxnma", VTI, fmaxnum, int_arm_mve_vmaxnma_predicated, bit_12>;
+  : MVE_VMAXMINNMA_m<"vmaxnma", VTI, vmaxnm, int_arm_mve_vmaxnma_predicated, bit_12>;
 
 defm MVE_VMAXNMAf32 : MVE_VMAXNMA<MVE_v4f32, 0b0>;
 defm MVE_VMAXNMAf16 : MVE_VMAXNMA<MVE_v8f16, 0b0>;
 
 multiclass MVE_VMINNMA<MVEVectorVTInfo VTI, bit bit_12>
-  : MVE_VMAXMINNMA_m<"vminnma", VTI, fminnum, int_arm_mve_vminnma_predicated, bit_12>;
+  : MVE_VMAXMINNMA_m<"vminnma", VTI, vminnm, int_arm_mve_vminnma_predicated, bit_12>;
 
 defm MVE_VMINNMAf32 : MVE_VMINNMA<MVE_v4f32, 0b1>;
 defm MVE_VMINNMAf16 : MVE_VMINNMA<MVE_v8f16, 0b1>;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 5767a74513e8d..bae9d705f5a7a 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2115,7 +2115,7 @@ static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
 /// true and store the intrinsic information into the IntrinsicInfo that was
 /// passed to the function.
 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                               const CallInst &I,
+                                               const CallBase &I,
                                                MachineFunction &MF,
                                                unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index f4d2a79051c10..cde8b5ba8d8a7 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -145,7 +145,7 @@ class HexagonTargetLowering : public TargetLowering {
       const SmallVectorImpl<SDValue> &OutVals,
       const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index ba9d0682b26dd..32ea2198f7898 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -8912,7 +8912,7 @@ bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
 }
 
 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                                 const CallInst &I,
+                                                 const CallBase &I,
                                                  MachineFunction &MF,
                                                  unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 0c09fb6afd2d1..5277e7e3e74ca 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -78,7 +78,7 @@ class LoongArchTargetLowering : public TargetLowering {
                                           Value *NewVal, Value *Mask,
                                           AtomicOrdering Ord) const override;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 8b72b1e1f3a52..5081a093d4c34 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -4077,9 +4077,10 @@ void NVPTXTargetLowering::LowerAsmOperandForConstraint(
 // because we need the information that is only available in the "Value" type
 // of destination
 // pointer. In particular, the address space information.
-bool NVPTXTargetLowering::getTgtMemIntrinsic(
-    IntrinsicInfo &Info, const CallInst &I,
-    MachineFunction &MF, unsigned Intrinsic) const {
+bool NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                             const CallBase &I,
+                                             MachineFunction &MF,
+                                             unsigned Intrinsic) const {
   switch (Intrinsic) {
   default:
     return false;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index dd8e49de7aa6a..cb0a1aa5dc892 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -32,7 +32,7 @@ class NVPTXTargetLowering : public TargetLowering {
                                const NVPTXSubtarget &STI);
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1a9310c46cd1d..51212837fbb17 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -18495,7 +18495,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 }
 
 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                           const CallInst &I,
+                                           const CallBase &I,
                                            MachineFunction &MF,
                                            unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 74af055ed5d30..daae839479c3c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -492,8 +492,7 @@ namespace llvm {
 
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
 
-    bool getTgtMemIntrinsic(IntrinsicInfo &Info,
-                            const CallInst &I,
+    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                             MachineFunction &MF,
                             unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 3d5a55c631301..1e5d0a4297465 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -1569,7 +1569,7 @@ bool RISCVInstructionSelector::selectAddr(MachineInstr &MI,
 
   switch (TM.getCodeModel()) {
   default: {
-    reportGISelFailure(*MF, *TPC, *MORE, getName(),
+    reportGISelFailure(*MF, *MORE, getName(),
                        "Unsupported code model for lowering", MI);
     return false;
   }
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 039bd55718e24..ab2652eac3823 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1868,7 +1868,7 @@ bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
 }
 
 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                             const CallInst &I,
+                                             const CallBase &I,
                                              MachineFunction &MF,
                                              unsigned Intrinsic) const {
   auto &DL = I.getDataLayout();
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 69fcada6494a2..8a55a5634452c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -35,7 +35,7 @@ class RISCVTargetLowering : public TargetLowering {
 
   const RISCVSubtarget &getSubtarget() const { return Subtarget; }
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index ae81d38579c18..0fb44052527f0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -883,10 +883,12 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeArray(uint32_t NumElems,
           .addUse(NumElementsVReg);
     });
   } else {
-    assert(ST.isShader() && "Runtime arrays are not allowed in non-shader "
-                            "SPIR-V modules.");
-    if (!ST.isShader())
+    if (!ST.isShader()) {
+      llvm::reportFatalUsageError(
+          "Runtime arrays are not allowed in non-shader "
+          "SPIR-V modules");
       return nullptr;
+    }
     ArrayType = createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
       return MIRBuilder.buildInstr(SPIRV::OpTypeRuntimeArray)
           .addDef(createTypeVReg(MIRBuilder))
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
index 0ba6589c68944..36fa5fa9a70cb 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -94,7 +94,7 @@ MVT SPIRVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
 }
 
 bool SPIRVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                             const CallInst &I,
+                                             const CallBase &I,
                                              MachineFunction &MF,
                                              unsigned Intrinsic) const {
   unsigned AlignIdx = 3;
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
index 3d31a116bad4a..5746832c8fd95 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
@@ -48,7 +48,7 @@ class SPIRVTargetLowering : public TargetLowering {
                                          EVT VT) const override;
   MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
                                     EVT VT) const override;
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 98cb7aba562c4..e0c527b9b2581 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1060,7 +1060,7 @@ EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
 }
 
 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                                   const CallInst &I,
+                                                   const CallBase &I,
                                                    MachineFunction &MF,
                                                    unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index f7052989b3c75..c37970f458e36 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -58,7 +58,7 @@ class WebAssemblyTargetLowering final : public TargetLowering {
   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
                          EVT VT) const override;
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index eb692679f5971..991507e883f28 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -109,6 +109,10 @@ def : Pat<(rotr I64:$lhs, (and I64:$rhs, 63)), (ROTR_I64 I64:$lhs, I64:$rhs)>;
 
 def : Pat<(shl I64:$lhs, (zext (and I32:$rhs, 63))),
                                (SHL_I64 I64:$lhs, (I64_EXTEND_U_I32 I32:$rhs))>;
+def : Pat<(sra I64:$lhs, (zext (and I32:$rhs, 63))),
+                               (SHR_S_I64 I64:$lhs, (I64_EXTEND_U_I32 I32:$rhs))>;
+def : Pat<(srl I64:$lhs, (zext (and I32:$rhs, 63))),
+                               (SHR_U_I64 I64:$lhs, (I64_EXTEND_U_I32 I32:$rhs))>;
 
 defm SELECT_I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs, I32:$cond),
                     (outs), (ins),
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9da121dd9ab87..d46f0f1572f1f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3104,7 +3104,7 @@ static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
 }
 
 bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                           const CallInst &I,
+                                           const CallBase &I,
                                            MachineFunction &MF,
                                            unsigned Intrinsic) const {
   Info.flags = MachineMemOperand::MONone;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index c5085299716ed..848fe4bf86d2c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1482,7 +1482,7 @@ namespace llvm {
     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
     /// true and stores the intrinsic information into the IntrinsicInfo that was
     /// passed to the function.
-    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                             MachineFunction &MF,
                             unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 001215abcfb26..3af67ff6ac3f5 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -6024,33 +6024,34 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
     DeadInsts.emplace_back(OperandIsInstr);
 }
 
-// Trying to hoist the IVInc to loop header if all IVInc users are in
-// the loop header. It will help backend to generate post index load/store
-// when the latch block is different from loop header block.
-static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,
-                          const LSRUse &LU, Instruction *IVIncInsertPos,
-                          Loop *L) {
+// Determine where to insert the transformed IV increment instruction for this
+// fixup. By default this is the default insert position, but if this is a
+// postincrement opportunity then we try to insert it in the same block as the
+// fixup user instruction, as this is needed for a postincrement instruction to
+// be generated.
+static Instruction *getFixupInsertPos(const TargetTransformInfo &TTI,
+                                      const LSRFixup &Fixup, const LSRUse &LU,
+                                      Instruction *IVIncInsertPos,
+                                      DominatorTree &DT) {
+  // Only address uses can be postincremented
   if (LU.Kind != LSRUse::Address)
-    return false;
-
-  // For now this code do the conservative optimization, only work for
-  // the header block. Later we can hoist the IVInc to the block post
-  // dominate all users.
-  BasicBlock *LHeader = L->getHeader();
-  if (IVIncInsertPos->getParent() == LHeader)
-    return false;
-
-  if (!Fixup.OperandValToReplace ||
-      any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) {
-        Instruction *UI = cast<Instruction>(U);
-        return UI->getParent() != LHeader;
-      }))
-    return false;
+    return IVIncInsertPos;
 
+  // Don't try to postincrement if it's not legal
   Instruction *I = Fixup.UserInst;
   Type *Ty = I->getType();
-  return (isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||
-         (isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty));
+  if (!(isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) &&
+      !(isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty)))
+    return IVIncInsertPos;
+
+  // It's only legal to hoist to the user block if it dominates the default
+  // insert position.
+  BasicBlock *HoistBlock = I->getParent();
+  BasicBlock *IVIncBlock = IVIncInsertPos->getParent();
+  if (!DT.dominates(I, IVIncBlock))
+    return IVIncInsertPos;
+
+  return HoistBlock->getTerminator();
 }
 
 /// Rewrite all the fixup locations with new values, following the chosen
@@ -6071,9 +6072,7 @@ void LSRInstance::ImplementSolution(
   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
     for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
       Instruction *InsertPos =
-          canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L)
-              ? L->getHeader()->getTerminator()
-              : IVIncInsertPos;
+          getFixupInsertPos(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, DT);
       Rewriter.setIVIncInsertPos(L, InsertPos);
       Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);
       Changed = true;
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index e3dceb7677043..57ba35936f3f7 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -3150,7 +3150,6 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
     assert(IsSplit || BeginOffset == NewBeginOffset);
     uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
 
-#ifndef NDEBUG
     StringRef OldName = OldPtr->getName();
     // Skip through the last '.sroa.' component of the name.
     size_t LastSROAPrefix = OldName.rfind(".sroa.");
@@ -3169,17 +3168,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
     }
     // Strip any SROA suffixes as well.
     OldName = OldName.substr(0, OldName.find(".sroa_"));
-#endif
 
     return getAdjustedPtr(IRB, DL, &NewAI,
                           APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
-                          PointerTy,
-#ifndef NDEBUG
-                          Twine(OldName) + "."
-#else
-                          Twine()
-#endif
-    );
+                          PointerTy, Twine(OldName) + ".");
   }
 
   /// Compute suitable alignment to access this slice of the *new*
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9b727a7998392..9a94d29ba3307 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5122,8 +5122,18 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
       InstructionCost C = getInstructionCost(&I, VF);
 
       // Check if we should override the cost.
-      if (C.isValid() && ForceTargetInstructionCost.getNumOccurrences() > 0)
-        C = InstructionCost(ForceTargetInstructionCost);
+      if (C.isValid() && ForceTargetInstructionCost.getNumOccurrences() > 0) {
+        // For interleave groups, use ForceTargetInstructionCost once for the
+        // whole group.
+        if (VF.isVector() && getWideningDecision(&I, VF) == CM_Interleave) {
+          if (getInterleavedAccessGroup(&I)->getInsertPos() == &I)
+            C = InstructionCost(ForceTargetInstructionCost);
+          else
+            C = InstructionCost(0);
+        } else {
+          C = InstructionCost(ForceTargetInstructionCost);
+        }
+      }
 
       BlockCost += C;
       LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF "
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6491a2ce6813b..422204ff3f292 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -280,7 +280,6 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
   if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
     RecipeCost = 0;
   } else {
-    RecipeCost = computeCost(VF, Ctx);
     RecipeCost = computeCost(VF, Ctx);
     if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&
         RecipeCost.isValid()) {
diff --git a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
index ffae5d38a7d8f..9a9a6a7d45931 100644
--- a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
+++ b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
@@ -167,3 +167,236 @@ loop:
 exit:
   ret void
 }
+
+define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) {
+; CHECK-LABEL: 'test_step2_start_outer_add_rec_step_16'
+; CHECK-NEXT:  Classifying expressions for: @test_step2_start_outer_add_rec_step_16
+; CHECK-NEXT:    %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
+; CHECK-NEXT:    --> {0,+,16}<%outer.header> U: [0,-15) S: [-9223372036854775808,9223372036854775793) Exits: <<Unknown>> LoopDispositions: { %outer.header: Computable, %loop: Invariant }
+; CHECK-NEXT:    %iv = phi i64 [ %outer.iv, %outer.header ], [ %iv.next, %loop ]
+; CHECK-NEXT:    --> {{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.0 = udiv i64 %iv, 4
+; CHECK-NEXT:    --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.1 = add i64 %iv, 1
+; CHECK-NEXT:    --> {{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.1 = udiv i64 %iv.1, 4
+; CHECK-NEXT:    --> ({{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.2 = add i64 %iv, 2
+; CHECK-NEXT:    --> {{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.2 = udiv i64 %iv.2, 4
+; CHECK-NEXT:    --> ({{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.3 = add i64 %iv, 3
+; CHECK-NEXT:    --> {{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.3 = udiv i64 %iv.3, 4
+; CHECK-NEXT:    --> ({{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.4 = add i64 %iv, 4
+; CHECK-NEXT:    --> {{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.4 = udiv i64 %iv.4, 4
+; CHECK-NEXT:    --> ({{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.5 = add i64 %iv, 5
+; CHECK-NEXT:    --> {{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.5 = udiv i64 %iv.5, 4
+; CHECK-NEXT:    --> ({{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.neg.1 = add i64 %iv, -1
+; CHECK-NEXT:    --> {{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.neg.1 = udiv i64 %iv.neg.1, 4
+; CHECK-NEXT:    --> ({{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div3.0 = udiv i64 %iv, 3
+; CHECK-NEXT:    --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div3.1 = udiv i64 %iv.1, 3
+; CHECK-NEXT:    --> ({{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517206) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div3.2 = udiv i64 %iv.2, 3
+; CHECK-NEXT:    --> ({{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div3.4 = udiv i64 %iv.4, 3
+; CHECK-NEXT:    --> ({{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div3.5 = udiv i64 %iv.5, 3
+; CHECK-NEXT:    --> ({{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517206) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.next = add i64 %iv, 2
+; CHECK-NEXT:    --> {{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %outer.iv.next = add i64 %outer.iv, 16
+; CHECK-NEXT:    --> {16,+,16}<%outer.header> U: [0,-15) S: [-9223372036854775808,9223372036854775793) Exits: <<Unknown>> LoopDispositions: { %outer.header: Computable, %loop: Invariant }
+; CHECK-NEXT:  Determining loop execution counts for: @test_step2_start_outer_add_rec_step_16
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+; CHECK-NEXT:  Loop %outer.header: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %outer.header: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %outer.header: Unpredictable symbolic max backedge-taken count.
+; CHECK-NEXT:  Loop %outer.header: Predicated backedge-taken count is (%m /u 16)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:      Equal predicate: (zext i4 (trunc i64 %m to i4) to i64) == 0
+; CHECK-NEXT:  Loop %outer.header: Predicated constant max backedge-taken count is i64 1152921504606846975
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:      Equal predicate: (zext i4 (trunc i64 %m to i4) to i64) == 0
+; CHECK-NEXT:  Loop %outer.header: Predicated symbolic max backedge-taken count is (%m /u 16)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:      Equal predicate: (zext i4 (trunc i64 %m to i4) to i64) == 0
+;
+entry:
+  br label %outer.header
+
+outer.header:
+  %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
+  br label %loop
+
+loop:
+  %iv = phi i64 [ %outer.iv, %outer.header ], [ %iv.next, %loop ]
+  %div.0 = udiv i64 %iv, 4
+  call void @use(i64 %div.0)
+  %iv.1 = add i64 %iv, 1
+  %div.1 = udiv i64 %iv.1, 4
+  call void @use(i64 %div.1)
+  %iv.2 = add i64 %iv, 2
+  %div.2 = udiv i64 %iv.2, 4
+  call void @use(i64 %div.2)
+  %iv.3 = add i64 %iv, 3
+  %div.3 = udiv i64 %iv.3, 4
+  call void @use(i64 %div.3)
+  %iv.4 = add i64 %iv, 4
+  %div.4 = udiv i64 %iv.4, 4
+  call void @use(i64 %div.4)
+  %iv.5 = add i64 %iv, 5
+  %div.5 = udiv i64 %iv.5, 4
+  call void @use(i64 %div.5)
+  %iv.neg.1 = add i64 %iv, -1
+  %div.neg.1 = udiv i64 %iv.neg.1, 4
+  call void @use(i64 %div.neg.1)
+  %div3.0 = udiv i64 %iv, 3
+  call void @use(i64 %div3.0)
+  %div3.1 = udiv i64 %iv.1,3
+  call void @use(i64 %div3.1)
+  %div3.2 = udiv i64 %iv.2, 3
+  call void @use(i64 %div3.2)
+  %div3.4 = udiv i64 %iv.4, 3
+  call void @use(i64 %div3.4)
+  %div3.5 = udiv i64 %iv.5, 3
+  call void @use(i64 %div3.5)
+  %iv.next = add i64 %iv, 2
+  %cond = icmp slt i64 %iv, %n
+  br i1 %cond, label %loop, label %outer.latch
+
+outer.latch:
+  %outer.iv.next = add i64 %outer.iv, 16
+  %outer.ec = icmp eq i64 %outer.iv, %m
+  br i1 %outer.ec, label %exit, label %outer.header
+
+exit:
+  ret void
+}
+
+define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) {
+; CHECK-LABEL: 'test_step2_div4_start_outer_add_rec_step_2'
+; CHECK-NEXT:  Classifying expressions for: @test_step2_div4_start_outer_add_rec_step_2
+; CHECK-NEXT:    %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
+; CHECK-NEXT:    --> {0,+,2}<%outer.header> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %outer.header: Computable, %loop: Invariant }
+; CHECK-NEXT:    %iv = phi i64 [ %outer.iv, %outer.header ], [ %iv.next, %loop ]
+; CHECK-NEXT:    --> {{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.0 = udiv i64 %iv, 4
+; CHECK-NEXT:    --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.1 = add i64 %iv, 1
+; CHECK-NEXT:    --> {{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.1 = udiv i64 %iv.1, 4
+; CHECK-NEXT:    --> ({{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.2 = add i64 %iv, 2
+; CHECK-NEXT:    --> {{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.2 = udiv i64 %iv.2, 4
+; CHECK-NEXT:    --> ({{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.3 = add i64 %iv, 3
+; CHECK-NEXT:    --> {{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.3 = udiv i64 %iv.3, 4
+; CHECK-NEXT:    --> ({{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.4 = add i64 %iv, 4
+; CHECK-NEXT:    --> {{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.4 = udiv i64 %iv.4, 4
+; CHECK-NEXT:    --> ({{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.5 = add i64 %iv, 5
+; CHECK-NEXT:    --> {{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.5 = udiv i64 %iv.5, 4
+; CHECK-NEXT:    --> ({{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.neg.1 = add i64 %iv, -1
+; CHECK-NEXT:    --> {{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div.neg.1 = udiv i64 %iv.neg.1, 4
+; CHECK-NEXT:    --> ({{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div3.0 = udiv i64 %iv, 3
+; CHECK-NEXT:    --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div3.1 = udiv i64 %iv.1, 3
+; CHECK-NEXT:    --> ({{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517206) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div3.2 = udiv i64 %iv.2, 3
+; CHECK-NEXT:    --> ({{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div3.4 = udiv i64 %iv.4, 3
+; CHECK-NEXT:    --> ({{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %div3.5 = udiv i64 %iv.5, 3
+; CHECK-NEXT:    --> ({{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517206) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %iv.next = add i64 %iv, 2
+; CHECK-NEXT:    --> {{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    %outer.iv.next = add i64 %outer.iv, 2
+; CHECK-NEXT:    --> {2,+,2}<%outer.header> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %outer.header: Computable, %loop: Invariant }
+; CHECK-NEXT:  Determining loop execution counts for: @test_step2_div4_start_outer_add_rec_step_2
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+; CHECK-NEXT:  Loop %outer.header: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %outer.header: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %outer.header: Unpredictable symbolic max backedge-taken count.
+; CHECK-NEXT:  Loop %outer.header: Predicated backedge-taken count is (%m /u 2)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:      Equal predicate: (zext i1 (trunc i64 %m to i1) to i64) == 0
+; CHECK-NEXT:  Loop %outer.header: Predicated constant max backedge-taken count is i64 9223372036854775807
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:      Equal predicate: (zext i1 (trunc i64 %m to i1) to i64) == 0
+; CHECK-NEXT:  Loop %outer.header: Predicated symbolic max backedge-taken count is (%m /u 2)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:      Equal predicate: (zext i1 (trunc i64 %m to i1) to i64) == 0
+;
+entry:
+  br label %outer.header
+
+outer.header:
+  %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
+  br label %loop
+
+loop:
+  %iv = phi i64 [ %outer.iv, %outer.header ], [ %iv.next, %loop ]
+  %div.0 = udiv i64 %iv, 4
+  call void @use(i64 %div.0)
+  %iv.1 = add i64 %iv, 1
+  %div.1 = udiv i64 %iv.1, 4
+  call void @use(i64 %div.1)
+  %iv.2 = add i64 %iv, 2
+  %div.2 = udiv i64 %iv.2, 4
+  call void @use(i64 %div.2)
+  %iv.3 = add i64 %iv, 3
+  %div.3 = udiv i64 %iv.3, 4
+  call void @use(i64 %div.3)
+  %iv.4 = add i64 %iv, 4
+  %div.4 = udiv i64 %iv.4, 4
+  call void @use(i64 %div.4)
+  %iv.5 = add i64 %iv, 5
+  %div.5 = udiv i64 %iv.5, 4
+  call void @use(i64 %div.5)
+  %iv.neg.1 = add i64 %iv, -1
+  %div.neg.1 = udiv i64 %iv.neg.1, 4
+  call void @use(i64 %div.neg.1)
+  %div3.0 = udiv i64 %iv, 3
+  call void @use(i64 %div3.0)
+  %div3.1 = udiv i64 %iv.1,3
+  call void @use(i64 %div3.1)
+  %div3.2 = udiv i64 %iv.2, 3
+  call void @use(i64 %div3.2)
+  %div3.4 = udiv i64 %iv.4, 3
+  call void @use(i64 %div3.4)
+  %div3.5 = udiv i64 %iv.5, 3
+  call void @use(i64 %div3.5)
+  call void @use(i64 %div.neg.1)
+  %iv.next = add i64 %iv, 2
+  %cond = icmp slt i64 %iv, %n
+  br i1 %cond, label %loop, label %outer.latch
+
+outer.latch:
+  %outer.iv.next = add i64 %outer.iv, 2
+  %outer.ec = icmp eq i64 %outer.iv, %m
+  br i1 %outer.ec, label %exit, label %outer.header
+
+exit:
+  ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll
index 32760caa524ec..146720febf486 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
 ; SMULH
 ;
 
-define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: smulh_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b
@@ -19,7 +19,7 @@ define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
   ret <vscale x 16 x i8> %tr
 }
 
-define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: smulh_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
@@ -33,7 +33,7 @@ define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %tr
 }
 
-define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: smulh_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
@@ -47,7 +47,7 @@ define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %tr
 }
 
-define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: smulh_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
@@ -65,7 +65,7 @@ define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 ; UMULH
 ;
 
-define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: umulh_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b
@@ -79,7 +79,7 @@ define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
   ret <vscale x 16 x i8> %tr
 }
 
-define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: umulh_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
@@ -93,7 +93,7 @@ define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %tr
 }
 
-define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: umulh_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
@@ -107,7 +107,7 @@ define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %tr
 }
 
-define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: umulh_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
@@ -121,4 +121,262 @@ define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
   ret <vscale x 2 x i64> %tr
 }
 
-attributes #0 = { "target-features"="+sve" }
+
+; Fixed-length 128bits
+
+define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: smulh_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull2 v2.8h, v0.16b, v1.16b
+; CHECK-NEXT:    smull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    uzp2 v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+  %1 = sext <16 x i8> %a to <16 x i16>
+  %2 = sext <16 x i8> %b to <16 x i16>
+  %mul = mul <16 x i16> %1, %2
+  %shr = lshr <16 x i16> %mul, splat(i16 8)
+  %tr = trunc <16 x i16> %shr to <16 x i8>
+  ret <16 x i8> %tr
+}
+
+define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: smulh_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    ret
+  %1 = sext <8 x i16> %a to <8 x i32>
+  %2 = sext <8 x i16> %b to <8 x i32>
+  %mul = mul <8 x i32> %1, %2
+  %shr = lshr <8 x i32> %mul, splat(i32 16)
+  %tr = trunc <8 x i32> %shr to <8 x i16>
+  ret <8 x i16> %tr
+}
+
+define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: smulh_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    ret
+  %1 = sext <4 x i32> %a to <4 x i64>
+  %2 = sext <4 x i32> %b to <4 x i64>
+  %mul = mul <4 x i64> %1, %2
+  %shr = lshr <4 x i64> %mul, splat(i64 32)
+  %tr = trunc <4 x i64> %shr to <4 x i32>
+  ret <4 x i32> %tr
+}
+
+define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: smulh_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    mov x9, v1.d[1]
+; CHECK-NEXT:    fmov x10, d0
+; CHECK-NEXT:    fmov x11, d1
+; CHECK-NEXT:    smulh x10, x10, x11
+; CHECK-NEXT:    smulh x8, x8, x9
+; CHECK-NEXT:    fmov d0, x10
+; CHECK-NEXT:    fmov d1, x8
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ret
+  %1 = sext <2 x i64> %a to <2 x i128>
+  %2 = sext <2 x i64> %b to <2 x i128>
+  %mul = mul <2 x i128> %1, %2
+  %shr = lshr <2 x i128> %mul, splat(i128 64)
+  %tr = trunc <2 x i128> %shr to <2 x i64>
+  ret <2 x i64> %tr
+}
+
+define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: umulh_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umull2 v2.8h, v0.16b, v1.16b
+; CHECK-NEXT:    umull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    uzp2 v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+  %1 = zext <16 x i8> %a to <16 x i16>
+  %2 = zext <16 x i8> %b to <16 x i16>
+  %mul = mul <16 x i16> %1, %2
+  %shr = lshr <16 x i16> %mul, splat(i16 8)
+  %tr = trunc <16 x i16> %shr to <16 x i8>
+  ret <16 x i8> %tr
+}
+
+define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: umulh_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT:    umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    ret
+  %1 = zext <8 x i16> %a to <8 x i32>
+  %2 = zext <8 x i16> %b to <8 x i32>
+  %mul = mul <8 x i32> %1, %2
+  %shr = lshr <8 x i32> %mul, splat(i32 16)
+  %tr = trunc <8 x i32> %shr to <8 x i16>
+  ret <8 x i16> %tr
+}
+
+define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: umulh_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umull2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    umull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    ret
+  %1 = zext <4 x i32> %a to <4 x i64>
+  %2 = zext <4 x i32> %b to <4 x i64>
+  %mul = mul <4 x i64> %1, %2
+  %shr = lshr <4 x i64> %mul, splat(i64 32)
+  %tr = trunc <4 x i64> %shr to <4 x i32>
+  ret <4 x i32> %tr
+}
+
+define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: umulh_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    mov x9, v1.d[1]
+; CHECK-NEXT:    fmov x10, d0
+; CHECK-NEXT:    fmov x11, d1
+; CHECK-NEXT:    umulh x10, x10, x11
+; CHECK-NEXT:    umulh x8, x8, x9
+; CHECK-NEXT:    fmov d0, x10
+; CHECK-NEXT:    fmov d1, x8
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ret
+  %1 = zext <2 x i64> %a to <2 x i128>
+  %2 = zext <2 x i64> %b to <2 x i128>
+  %mul = mul <2 x i128> %1, %2
+  %shr = lshr <2 x i128> %mul, splat(i128 64)
+  %tr = trunc <2 x i128> %shr to <2 x i64>
+  ret <2 x i64> %tr
+}
+
+
+
+; Fixed-length 64bits
+
+define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: smulh_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    shrn v0.8b, v0.8h, #8
+; CHECK-NEXT:    ret
+  %1 = sext <8 x i8> %a to <8 x i16>
+  %2 = sext <8 x i8> %b to <8 x i16>
+  %mul = mul <8 x i16> %1, %2
+  %shr = lshr <8 x i16> %mul, splat(i16 8)
+  %tr = trunc <8 x i16> %shr to <8 x i8>
+  ret <8 x i8> %tr
+}
+
+define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: smulh_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    shrn v0.4h, v0.4s, #16
+; CHECK-NEXT:    ret
+  %1 = sext <4 x i16> %a to <4 x i32>
+  %2 = sext <4 x i16> %b to <4 x i32>
+  %mul = mul <4 x i32> %1, %2
+  %shr = lshr <4 x i32> %mul, splat(i32 16)
+  %tr = trunc <4 x i32> %shr to <4 x i16>
+  ret <4 x i16> %tr
+}
+
+define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: smulh_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    shrn v0.2s, v0.2d, #32
+; CHECK-NEXT:    ret
+  %1 = sext <2 x i32> %a to <2 x i64>
+  %2 = sext <2 x i32> %b to <2 x i64>
+  %mul = mul <2 x i64> %1, %2
+  %shr = lshr <2 x i64> %mul, splat(i64 32)
+  %tr = trunc <2 x i64> %shr to <2 x i32>
+  ret <2 x i32> %tr
+}
+
+define <1 x i64> @smulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
+; CHECK-LABEL: smulh_v1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    fmov x9, d1
+; CHECK-NEXT:    smulh x8, x8, x9
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %1 = sext <1 x i64> %a to <1 x i128>
+  %2 = sext <1 x i64> %b to <1 x i128>
+  %mul = mul <1 x i128> %1, %2
+  %shr = lshr <1 x i128> %mul, splat(i128 64)
+  %tr = trunc <1 x i128> %shr to <1 x i64>
+  ret <1 x i64> %tr
+}
+
+define <8 x i8> @umulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: umulh_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    shrn v0.8b, v0.8h, #8
+; CHECK-NEXT:    ret
+  %1 = zext <8 x i8> %a to <8 x i16>
+  %2 = zext <8 x i8> %b to <8 x i16>
+  %mul = mul <8 x i16> %1, %2
+  %shr = lshr <8 x i16> %mul, splat(i16 8)
+  %tr = trunc <8 x i16> %shr to <8 x i8>
+  ret <8 x i8> %tr
+}
+
+define <4 x i16> @umulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: umulh_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    shrn v0.4h, v0.4s, #16
+; CHECK-NEXT:    ret
+  %1 = zext <4 x i16> %a to <4 x i32>
+  %2 = zext <4 x i16> %b to <4 x i32>
+  %mul = mul <4 x i32> %1, %2
+  %shr = lshr <4 x i32> %mul, splat(i32 16)
+  %tr = trunc <4 x i32> %shr to <4 x i16>
+  ret <4 x i16> %tr
+}
+
+define <2 x i32> @umulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: umulh_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    shrn v0.2s, v0.2d, #32
+; CHECK-NEXT:    ret
+  %1 = zext <2 x i32> %a to <2 x i64>
+  %2 = zext <2 x i32> %b to <2 x i64>
+  %mul = mul <2 x i64> %1, %2
+  %shr = lshr <2 x i64> %mul, splat(i64 32)
+  %tr = trunc <2 x i64> %shr to <2 x i32>
+  ret <2 x i32> %tr
+}
+
+define <1 x i64> @umulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
+; CHECK-LABEL: umulh_v1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    fmov x9, d1
+; CHECK-NEXT:    umulh x8, x8, x9
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %1 = zext <1 x i64> %a to <1 x i128>
+  %2 = zext <1 x i64> %b to <1 x i128>
+  %mul = mul <1 x i128> %1, %2
+  %shr = lshr <1 x i128> %mul, splat(i128 64)
+  %tr = trunc <1 x i128> %shr to <1 x i64>
+  ret <1 x i64> %tr
+}
+
diff --git a/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll
index bcf76d5b13d62..d7534712b53a0 100644
--- a/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
 ; SMULH
 ;
 
-define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: smulh_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    smulh z0.b, z0.b, z1.b
@@ -18,7 +18,7 @@ define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
   ret <vscale x 16 x i8> %tr
 }
 
-define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: smulh_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    smulh z0.h, z0.h, z1.h
@@ -31,7 +31,7 @@ define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %tr
 }
 
-define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: smulh_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    smulh z0.s, z0.s, z1.s
@@ -44,7 +44,7 @@ define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %tr
 }
 
-define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: smulh_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    smulh z0.d, z0.d, z1.d
@@ -61,7 +61,7 @@ define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 ; UMULH
 ;
 
-define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: umulh_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umulh z0.b, z0.b, z1.b
@@ -74,7 +74,7 @@ define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
   ret <vscale x 16 x i8> %tr
 }
 
-define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: umulh_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umulh z0.h, z0.h, z1.h
@@ -87,7 +87,7 @@ define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %tr
 }
 
-define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: umulh_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umulh z0.s, z0.s, z1.s
@@ -100,7 +100,7 @@ define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %tr
 }
 
-define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: umulh_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umulh z0.d, z0.d, z1.d
@@ -113,4 +113,261 @@ define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
   ret <vscale x 2 x i64> %tr
 }
 
-attributes #0 = { "target-features"="+sve2" }
+
+; Fixed-length 128bits
+
+define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: smulh_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull2 v2.8h, v0.16b, v1.16b
+; CHECK-NEXT:    smull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    uzp2 v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+  %1 = sext <16 x i8> %a to <16 x i16>
+  %2 = sext <16 x i8> %b to <16 x i16>
+  %mul = mul <16 x i16> %1, %2
+  %shr = lshr <16 x i16> %mul, splat(i16 8)
+  %tr = trunc <16 x i16> %shr to <16 x i8>
+  ret <16 x i8> %tr
+}
+
+define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: smulh_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    ret
+  %1 = sext <8 x i16> %a to <8 x i32>
+  %2 = sext <8 x i16> %b to <8 x i32>
+  %mul = mul <8 x i32> %1, %2
+  %shr = lshr <8 x i32> %mul, splat(i32 16)
+  %tr = trunc <8 x i32> %shr to <8 x i16>
+  ret <8 x i16> %tr
+}
+
+define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: smulh_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    ret
+  %1 = sext <4 x i32> %a to <4 x i64>
+  %2 = sext <4 x i32> %b to <4 x i64>
+  %mul = mul <4 x i64> %1, %2
+  %shr = lshr <4 x i64> %mul, splat(i64 32)
+  %tr = trunc <4 x i64> %shr to <4 x i32>
+  ret <4 x i32> %tr
+}
+
+define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: smulh_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    mov x9, v1.d[1]
+; CHECK-NEXT:    fmov x10, d0
+; CHECK-NEXT:    fmov x11, d1
+; CHECK-NEXT:    smulh x10, x10, x11
+; CHECK-NEXT:    smulh x8, x8, x9
+; CHECK-NEXT:    fmov d0, x10
+; CHECK-NEXT:    fmov d1, x8
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ret
+  %1 = sext <2 x i64> %a to <2 x i128>
+  %2 = sext <2 x i64> %b to <2 x i128>
+  %mul = mul <2 x i128> %1, %2
+  %shr = lshr <2 x i128> %mul, splat(i128 64)
+  %tr = trunc <2 x i128> %shr to <2 x i64>
+  ret <2 x i64> %tr
+}
+
+define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: umulh_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umull2 v2.8h, v0.16b, v1.16b
+; CHECK-NEXT:    umull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    uzp2 v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+  %1 = zext <16 x i8> %a to <16 x i16>
+  %2 = zext <16 x i8> %b to <16 x i16>
+  %mul = mul <16 x i16> %1, %2
+  %shr = lshr <16 x i16> %mul, splat(i16 8)
+  %tr = trunc <16 x i16> %shr to <16 x i8>
+  ret <16 x i8> %tr
+}
+
+define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: umulh_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT:    umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    ret
+  %1 = zext <8 x i16> %a to <8 x i32>
+  %2 = zext <8 x i16> %b to <8 x i32>
+  %mul = mul <8 x i32> %1, %2
+  %shr = lshr <8 x i32> %mul, splat(i32 16)
+  %tr = trunc <8 x i32> %shr to <8 x i16>
+  ret <8 x i16> %tr
+}
+
+define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: umulh_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umull2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    umull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    ret
+  %1 = zext <4 x i32> %a to <4 x i64>
+  %2 = zext <4 x i32> %b to <4 x i64>
+  %mul = mul <4 x i64> %1, %2
+  %shr = lshr <4 x i64> %mul, splat(i64 32)
+  %tr = trunc <4 x i64> %shr to <4 x i32>
+  ret <4 x i32> %tr
+}
+
+define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: umulh_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    mov x9, v1.d[1]
+; CHECK-NEXT:    fmov x10, d0
+; CHECK-NEXT:    fmov x11, d1
+; CHECK-NEXT:    umulh x10, x10, x11
+; CHECK-NEXT:    umulh x8, x8, x9
+; CHECK-NEXT:    fmov d0, x10
+; CHECK-NEXT:    fmov d1, x8
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ret
+  %1 = zext <2 x i64> %a to <2 x i128>
+  %2 = zext <2 x i64> %b to <2 x i128>
+  %mul = mul <2 x i128> %1, %2
+  %shr = lshr <2 x i128> %mul, splat(i128 64)
+  %tr = trunc <2 x i128> %shr to <2 x i64>
+  ret <2 x i64> %tr
+}
+
+
+
+; Fixed-length 64bits
+
+define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: smulh_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    shrn v0.8b, v0.8h, #8
+; CHECK-NEXT:    ret
+  %1 = sext <8 x i8> %a to <8 x i16>
+  %2 = sext <8 x i8> %b to <8 x i16>
+  %mul = mul <8 x i16> %1, %2
+  %shr = lshr <8 x i16> %mul, splat(i16 8)
+  %tr = trunc <8 x i16> %shr to <8 x i8>
+  ret <8 x i8> %tr
+}
+
+define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: smulh_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    shrn v0.4h, v0.4s, #16
+; CHECK-NEXT:    ret
+  %1 = sext <4 x i16> %a to <4 x i32>
+  %2 = sext <4 x i16> %b to <4 x i32>
+  %mul = mul <4 x i32> %1, %2
+  %shr = lshr <4 x i32> %mul, splat(i32 16)
+  %tr = trunc <4 x i32> %shr to <4 x i16>
+  ret <4 x i16> %tr
+}
+
+define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: smulh_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    shrn v0.2s, v0.2d, #32
+; CHECK-NEXT:    ret
+  %1 = sext <2 x i32> %a to <2 x i64>
+  %2 = sext <2 x i32> %b to <2 x i64>
+  %mul = mul <2 x i64> %1, %2
+  %shr = lshr <2 x i64> %mul, splat(i64 32)
+  %tr = trunc <2 x i64> %shr to <2 x i32>
+  ret <2 x i32> %tr
+}
+
+define <1 x i64> @smulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
+; CHECK-LABEL: smulh_v1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    fmov x9, d1
+; CHECK-NEXT:    smulh x8, x8, x9
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %1 = sext <1 x i64> %a to <1 x i128>
+  %2 = sext <1 x i64> %b to <1 x i128>
+  %mul = mul <1 x i128> %1, %2
+  %shr = lshr <1 x i128> %mul, splat(i128 64)
+  %tr = trunc <1 x i128> %shr to <1 x i64>
+  ret <1 x i64> %tr
+}
+
+define <8 x i8> @umulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: umulh_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    shrn v0.8b, v0.8h, #8
+; CHECK-NEXT:    ret
+  %1 = zext <8 x i8> %a to <8 x i16>
+  %2 = zext <8 x i8> %b to <8 x i16>
+  %mul = mul <8 x i16> %1, %2
+  %shr = lshr <8 x i16> %mul, splat(i16 8)
+  %tr = trunc <8 x i16> %shr to <8 x i8>
+  ret <8 x i8> %tr
+}
+
+define <4 x i16> @umulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: umulh_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    shrn v0.4h, v0.4s, #16
+; CHECK-NEXT:    ret
+  %1 = zext <4 x i16> %a to <4 x i32>
+  %2 = zext <4 x i16> %b to <4 x i32>
+  %mul = mul <4 x i32> %1, %2
+  %shr = lshr <4 x i32> %mul, splat(i32 16)
+  %tr = trunc <4 x i32> %shr to <4 x i16>
+  ret <4 x i16> %tr
+}
+
+define <2 x i32> @umulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: umulh_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    shrn v0.2s, v0.2d, #32
+; CHECK-NEXT:    ret
+  %1 = zext <2 x i32> %a to <2 x i64>
+  %2 = zext <2 x i32> %b to <2 x i64>
+  %mul = mul <2 x i64> %1, %2
+  %shr = lshr <2 x i64> %mul, splat(i64 32)
+  %tr = trunc <2 x i64> %shr to <2 x i32>
+  ret <2 x i32> %tr
+}
+
+define <1 x i64> @umulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
+; CHECK-LABEL: umulh_v1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    fmov x9, d1
+; CHECK-NEXT:    umulh x8, x8, x9
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %1 = zext <1 x i64> %a to <1 x i128>
+  %2 = zext <1 x i64> %b to <1 x i128>
+  %mul = mul <1 x i128> %1, %2
+  %shr = lshr <1 x i128> %mul, splat(i128 64)
+  %tr = trunc <1 x i128> %shr to <1 x i64>
+  ret <1 x i64> %tr
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
index 405861d791169..9dfd0a47d1e1e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
@@ -10,41 +10,75 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(ptr addrspace(4) inre
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_and_b32 s0, s4, 3
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, 4
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v0, s[2:3]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX9-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX9-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX9-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_and_b32 s0, s4, 3
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
-; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[2:3], s0
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_add_u32 s0, s2, s0
+; GFX8-NEXT:    s_addc_u32 s1, s3, 0
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX8-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX8-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
 ; GFX7-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_and_b32 s0, s4, 3
-; GFX7-NEXT:    s_lshl_b32 s0, s0, 4
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[2:3], s0
-; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_mov_b32 s0, s2
+; GFX7-NEXT:    s_and_b32 s2, s4, 3
+; GFX7-NEXT:    s_lshl_b32 s4, s2, 4
+; GFX7-NEXT:    s_mov_b32 s5, 0
+; GFX7-NEXT:    v_mov_b32_e32 v0, s4
+; GFX7-NEXT:    s_mov_b32 s1, s3
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, s5
+; GFX7-NEXT:    v_mov_b32_e32 v1, s5
+; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX7-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX7-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX7-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_and_b32 s0, s4, 3
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, 4
-; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-NEXT:    global_load_dwordx4 v[0:3], v0, s[2:3]
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX10-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX10-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_and_b32 s0, s4, 3
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 4
-; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], s0 offset:0x0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    global_load_b128 v[0:3], v0, s[2:3]
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX11-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX11-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX11-NEXT:    ; return to shader part epilog
   %vector = load <4 x i128>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i128> %vector, i32 %idx
@@ -281,22 +315,63 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
 }
 
 define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx0(ptr addrspace(4) inreg %ptr) {
-; GCN-LABEL: extractelement_sgpr_v4i128_idx0:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: extractelement_sgpr_v4i128_idx0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v0, s[2:3]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX9-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX9-NEXT:    v_readfirstlane_b32 s3, v3
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: extractelement_sgpr_v4i128_idx0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8-NEXT:    v_mov_b32_e32 v1, s3
+; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX8-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX8-NEXT:    v_readfirstlane_b32 s3, v3
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX7-LABEL: extractelement_sgpr_v4i128_idx0:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_mov_b32 s0, s2
+; GFX7-NEXT:    s_mov_b32 s1, s3
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX7-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX7-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX7-NEXT:    v_readfirstlane_b32 s3, v3
+; GFX7-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: extractelement_sgpr_v4i128_idx0:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    global_load_dwordx4 v[0:3], v0, s[2:3]
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX10-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX10-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: extractelement_sgpr_v4i128_idx0:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    global_load_b128 v[0:3], v0, s[2:3]
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX11-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX11-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX11-NEXT:    ; return to shader part epilog
   %vector = load <4 x i128>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i128> %vector, i32 0
@@ -306,32 +381,63 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx0(ptr addrspace(4) inreg %p
 define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx1(ptr addrspace(4) inreg %ptr) {
 ; GFX9-LABEL: extractelement_sgpr_v4i128_idx1:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x10
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v0, s[2:3] offset:16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX9-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX9-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX9-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: extractelement_sgpr_v4i128_idx1:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x10
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_add_u32 s0, s2, 16
+; GFX8-NEXT:    s_addc_u32 s1, s3, 0
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX8-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX8-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
 ; GFX7-LABEL: extractelement_sgpr_v4i128_idx1:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x4
-; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_mov_b32 s0, s2
+; GFX7-NEXT:    s_mov_b32 s1, s3
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX7-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX7-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX7-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: extractelement_sgpr_v4i128_idx1:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x10
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    global_load_dwordx4 v[0:3], v0, s[2:3] offset:16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX10-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX10-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: extractelement_sgpr_v4i128_idx1:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x10
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    global_load_b128 v[0:3], v0, s[2:3] offset:16
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX11-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX11-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX11-NEXT:    ; return to shader part epilog
   %vector = load <4 x i128>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i128> %vector, i32 1
@@ -341,32 +447,63 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx1(ptr addrspace(4) inreg %p
 define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx2(ptr addrspace(4) inreg %ptr) {
 ; GFX9-LABEL: extractelement_sgpr_v4i128_idx2:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x20
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v0, s[2:3] offset:32
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX9-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX9-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX9-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: extractelement_sgpr_v4i128_idx2:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x20
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_add_u32 s0, s2, 32
+; GFX8-NEXT:    s_addc_u32 s1, s3, 0
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX8-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX8-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
 ; GFX7-LABEL: extractelement_sgpr_v4i128_idx2:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x8
-; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_mov_b32 s0, s2
+; GFX7-NEXT:    s_mov_b32 s1, s3
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX7-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX7-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX7-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: extractelement_sgpr_v4i128_idx2:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x20
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    global_load_dwordx4 v[0:3], v0, s[2:3] offset:32
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX10-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX10-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: extractelement_sgpr_v4i128_idx2:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x20
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    global_load_b128 v[0:3], v0, s[2:3] offset:32
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX11-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX11-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX11-NEXT:    ; return to shader part epilog
   %vector = load <4 x i128>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i128> %vector, i32 2
@@ -376,32 +513,63 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx2(ptr addrspace(4) inreg %p
 define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx3(ptr addrspace(4) inreg %ptr) {
 ; GFX9-LABEL: extractelement_sgpr_v4i128_idx3:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x30
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v0, s[2:3] offset:48
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX9-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX9-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX9-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: extractelement_sgpr_v4i128_idx3:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x30
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_add_u32 s0, s2, 48
+; GFX8-NEXT:    s_addc_u32 s1, s3, 0
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX8-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX8-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
 ; GFX7-LABEL: extractelement_sgpr_v4i128_idx3:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0xc
-; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_mov_b32 s0, s2
+; GFX7-NEXT:    s_mov_b32 s1, s3
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX7-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX7-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX7-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: extractelement_sgpr_v4i128_idx3:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x30
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    global_load_dwordx4 v[0:3], v0, s[2:3] offset:48
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX10-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX10-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: extractelement_sgpr_v4i128_idx3:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x30
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    global_load_b128 v[0:3], v0, s[2:3] offset:48
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX11-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX11-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX11-NEXT:    ; return to shader part epilog
   %vector = load <4 x i128>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i128> %vector, i32 3
@@ -585,3 +753,5 @@ define i128 @extractelement_vgpr_v4i128_idx3(ptr addrspace(1) %ptr) {
   %element = extractelement <4 x i128> %vector, i32 3
   ret i128 %element
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
index 9539ec465e02f..91ee7642790fc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
@@ -11,28 +11,40 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 ; GFX8V4-LABEL: addrspacecast:
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX8V4-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x40
 ; GFX8V4-NEXT:    s_add_i32 s12, s12, s17
 ; GFX8V4-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GFX8V4-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX8V4-NEXT:    s_add_u32 s2, s6, 0x44
+; GFX8V4-NEXT:    s_addc_u32 s3, s7, 0
+; GFX8V4-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX8V4-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V4-NEXT:    s_mov_b32 s4, s0
-; GFX8V4-NEXT:    s_mov_b32 s5, s3
 ; GFX8V4-NEXT:    s_cmp_lg_u32 s0, -1
-; GFX8V4-NEXT:    s_cselect_b64 s[4:5], s[4:5], 0
-; GFX8V4-NEXT:    s_mov_b32 s6, s1
-; GFX8V4-NEXT:    s_mov_b32 s7, s2
+; GFX8V4-NEXT:    v_mov_b32_e32 v1, s3
+; GFX8V4-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX8V4-NEXT:    s_and_b32 s4, 1, s2
+; GFX8V4-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX8V4-NEXT:    s_add_u32 s2, s6, 64
+; GFX8V4-NEXT:    flat_load_dword v3, v[0:1]
+; GFX8V4-NEXT:    s_addc_u32 s3, s7, 0
+; GFX8V4-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8V4-NEXT:    v_mov_b32_e32 v1, s3
+; GFX8V4-NEXT:    flat_load_dword v4, v[0:1]
 ; GFX8V4-NEXT:    s_cmp_lg_u32 s1, -1
-; GFX8V4-NEXT:    v_mov_b32_e32 v0, s4
-; GFX8V4-NEXT:    s_cselect_b64 s[0:1], s[6:7], 0
-; GFX8V4-NEXT:    v_mov_b32_e32 v2, 1
-; GFX8V4-NEXT:    v_mov_b32_e32 v1, s5
-; GFX8V4-NEXT:    flat_store_dword v[0:1], v2
-; GFX8V4-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8V4-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8V4-NEXT:    v_mov_b32_e32 v2, 2
+; GFX8V4-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX8V4-NEXT:    s_and_b32 s0, 1, s0
 ; GFX8V4-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8V4-NEXT:    flat_store_dword v[0:1], v2
+; GFX8V4-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
+; GFX8V4-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
+; GFX8V4-NEXT:    v_mov_b32_e32 v5, 1
+; GFX8V4-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX8V4-NEXT:    v_cndmask_b32_e64 v2, 0, v1, s[0:1]
+; GFX8V4-NEXT:    s_waitcnt vmcnt(1)
+; GFX8V4-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX8V4-NEXT:    flat_store_dword v[0:1], v5
+; GFX8V4-NEXT:    s_waitcnt vmcnt(0)
+; GFX8V4-NEXT:    v_mov_b32_e32 v0, 2
+; GFX8V4-NEXT:    v_cndmask_b32_e64 v3, 0, v4, s[0:1]
+; GFX8V4-NEXT:    flat_store_dword v[2:3], v0
 ; GFX8V4-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8V4-NEXT:    s_endpgm
 ;
@@ -124,13 +136,15 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) #0 {
 ; GFX8V4-LABEL: llvm_amdgcn_is_shared:
 ; GFX8V4:       ; %bb.0:
-; GFX8V4-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX8V4-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V4-NEXT:    s_load_dword s0, s[6:7], 0x40
-; GFX8V4-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V4-NEXT:    s_cmp_eq_u32 s1, s0
-; GFX8V4-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX8V4-NEXT:    s_add_u32 s0, s6, 64
+; GFX8V4-NEXT:    s_addc_u32 s1, s7, 0
 ; GFX8V4-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8V4-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8V4-NEXT:    flat_load_dword v0, v[0:1]
+; GFX8V4-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; GFX8V4-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8V4-NEXT:    v_cmp_eq_u32_e32 vcc, s1, v0
+; GFX8V4-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX8V4-NEXT:    flat_store_dword v[0:1], v0
 ; GFX8V4-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8V4-NEXT:    s_endpgm
@@ -180,13 +194,15 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) #0 {
 define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) #0 {
 ; GFX8V4-LABEL: llvm_amdgcn_is_private:
 ; GFX8V4:       ; %bb.0:
-; GFX8V4-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX8V4-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V4-NEXT:    s_load_dword s0, s[6:7], 0x44
-; GFX8V4-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V4-NEXT:    s_cmp_eq_u32 s1, s0
-; GFX8V4-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX8V4-NEXT:    s_add_u32 s0, s6, 0x44
+; GFX8V4-NEXT:    s_addc_u32 s1, s7, 0
 ; GFX8V4-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8V4-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8V4-NEXT:    flat_load_dword v0, v[0:1]
+; GFX8V4-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; GFX8V4-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8V4-NEXT:    v_cmp_eq_u32_e32 vcc, s1, v0
+; GFX8V4-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX8V4-NEXT:    flat_store_dword v[0:1], v0
 ; GFX8V4-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8V4-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir
index 999ea42910d92..e35927e8bf00d 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir
@@ -1,7 +1,9 @@
-# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s 2>&1 | FileCheck %s
-# CHECK: Illegal virtual register for instruction
-# CHECK: Expected a VGPR_32 register, but got a AGPR_32 register
- 
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s | FileCheck %s
+# CHECK: bb.1:
+# CHECK: dead %{{[0-9]+}}:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, 4, 4, %{{[0-9]+}}, %[[REG:[0-9]+]], 4, 0, implicit $mode, implicit $exec
+# CHECK: %{{[0-9]+}}:agpr_32 = IMPLICIT_DEF
+# CHECK: %[[REG]]:vgpr_32 = COPY %{{[0-9]+}}
+
 # Test for issue in amdgpu-rewrite-agpr-copy-mfma, which reassigns scale operand
 # in vgpr_32 register to agpr_32, not permitted by instruction format.
 ---
diff --git a/llvm/test/CodeGen/SPIRV/zero-length-array.ll b/llvm/test/CodeGen/SPIRV/zero-length-array.ll
index 5fd94d25dfd87..cb34529ebfecd 100644
--- a/llvm/test/CodeGen/SPIRV/zero-length-array.ll
+++ b/llvm/test/CodeGen/SPIRV/zero-length-array.ll
@@ -1,7 +1,9 @@
-; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-vulkan-compute < %s | FileCheck %s
 ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %}
 
-; Nothing is generated, but compilation doesn't crash.
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown < %s 2>&1 | FileCheck -check-prefix=CHECK-ERR %s
+
+; For compute, nothing is generated, but compilation doesn't crash.
 ; CHECK: OpName %[[#FOO:]] "foo"
 ; CHECK: OpName %[[#RTM:]] "reg2mem alloca point"
 ; CHECK: %[[#INT:]] = OpTypeInt 32 0
@@ -11,6 +13,10 @@
 ; CHECK-NEXT: OpReturn
 ; CHECK-NEXT: OpFunctionEnd
 
+
+; For non-compute, error.
+; CHECK-ERR: LLVM ERROR: Runtime arrays are not allowed in non-shader SPIR-V modules
+
 define spir_func void @foo() {
 entry:
   %i = alloca [0 x i32], align 4
diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
index d076cb00ad7e0..706a7c34c3df5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
@@ -66,9 +66,8 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) {
 ; CHECK-NEXT:    cset r6, ne
 ; CHECK-NEXT:    strb r6, [r5]
 ; CHECK-NEXT:    add.w r2, r2, #792
-; CHECK-NEXT:    ldrb r6, [r3]
+; CHECK-NEXT:    ldrb r6, [r3], #2
 ; CHECK-NEXT:    adds r4, #8
-; CHECK-NEXT:    adds r3, #2
 ; CHECK-NEXT:    cmp r6, #0
 ; CHECK-NEXT:    ite ne
 ; CHECK-NEXT:    sxthne r6, r1
@@ -101,8 +100,7 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) {
 ; CHECK-NEXT:    cset r6, ne
 ; CHECK-NEXT:    adds r4, #8
 ; CHECK-NEXT:    strb r6, [r5]
-; CHECK-NEXT:    ldrb r6, [r3]
-; CHECK-NEXT:    adds r3, #2
+; CHECK-NEXT:    ldrb r6, [r3], #2
 ; CHECK-NEXT:    cmp r6, #0
 ; CHECK-NEXT:    ite ne
 ; CHECK-NEXT:    sxthne r6, r1
@@ -134,8 +132,7 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) {
 ; CHECK-NEXT:    cset r4, ne
 ; CHECK-NEXT:    add.w r11, r11, #8
 ; CHECK-NEXT:    strb r4, [r5]
-; CHECK-NEXT:    ldrb r4, [r3]
-; CHECK-NEXT:    adds r3, #2
+; CHECK-NEXT:    ldrb r4, [r3], #2
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    ite ne
 ; CHECK-NEXT:    sxthne r4, r1
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
index d19844c683a8f..9e42f3984c24d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
@@ -240,4 +240,93 @@ entry:
   ret <8 x half> %0
 }
 
-attributes #0 = { strictfp }
\ No newline at end of file
+
+define arm_aapcs_vfpcc <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_vminnmq_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnm.f16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_vminnmq_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnm.f32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_vmaxnmq_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnm.f16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_vmaxnmq_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnm.f32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vminnmaq_f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_vminnmaq_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnma.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+  %1 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+  %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %0, <8 x half> %1)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vminnmaq_f32(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_vminnmaq_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnma.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+  %1 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+  %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %0, <4 x float> %1)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vmaxnmaq_f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_vmaxnmaq_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnma.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+  %1 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+  %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %0, <8 x half> %1)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vmaxnmaq_f32(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_vmaxnmaq_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnma.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+  %1 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+  %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %0, <4 x float> %1)
+  ret <4 x float> %2
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
index 368f30fd5d7ed..8f90fa68e8fbd 100644
--- a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
+++ b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
@@ -46,6 +46,21 @@ define i32 @sra_i32(i32 %v, i32 %x) {
   ret i32 %a
 }
 
+define i64 @sra_i64_zext(i64 %v, i32 %x) {
+; CHECK-LABEL: sra_i64_zext:
+; CHECK:         .functype sra_i64_zext (i64, i32) -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i64.extend_i32_u
+; CHECK-NEXT:    i64.shr_s
+; CHECK-NEXT:    # fallthrough-return
+  %m = and i32 %x, 63
+  %z = zext i32 %m to i64
+  %a = ashr i64 %v, %z
+  ret i64 %a
+}
+
 define i32 @srl_i32(i32 %v, i32 %x) {
 ; CHECK-LABEL: srl_i32:
 ; CHECK:         .functype srl_i32 (i32, i32) -> (i32)
@@ -59,6 +74,21 @@ define i32 @srl_i32(i32 %v, i32 %x) {
   ret i32 %a
 }
 
+define i64 @srl_i64_zext(i64 %v, i32 %x) {
+; CHECK-LABEL: srl_i64_zext:
+; CHECK:         .functype srl_i64_zext (i64, i32) -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i64.extend_i32_u
+; CHECK-NEXT:    i64.shr_u
+; CHECK-NEXT:    # fallthrough-return
+  %m = and i32 %x, 63
+  %z = zext i32 %m to i64
+  %a = lshr i64 %v, %z
+  ret i64 %a
+}
+
 define i64 @shl_i64(i64 %v, i64 %x) {
 ; CHECK-LABEL: shl_i64:
 ; CHECK:         .functype shl_i64 (i64, i64) -> (i64)
diff --git a/llvm/test/CodeGen/X86/combine-fcmp.ll b/llvm/test/CodeGen/X86/combine-fcmp.ll
new file mode 100644
index 0000000000000..f2666f69949b7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-fcmp.ll
@@ -0,0 +1,330 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64    | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX1OR2,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512
+
+define i4 @concat_fcmp_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1) {
+; SSE-LABEL: concat_fcmp_v4f64_v2f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    xorpd %xmm2, %xmm2
+; SSE-NEXT:    xorpd %xmm3, %xmm3
+; SSE-NEXT:    cmpltpd %xmm0, %xmm3
+; SSE-NEXT:    cmpltpd %xmm1, %xmm2
+; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,2],xmm2[0,2]
+; SSE-NEXT:    movmskps %xmm3, %eax
+; SSE-NEXT:    # kill: def $al killed $al killed $eax
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: concat_fcmp_v4f64_v2f64:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
+; AVX1OR2-NEXT:    vcmpltpd %xmm0, %xmm2, %xmm0
+; AVX1OR2-NEXT:    vcmpltpd %xmm1, %xmm2, %xmm1
+; AVX1OR2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; AVX1OR2-NEXT:    vmovmskps %xmm0, %eax
+; AVX1OR2-NEXT:    # kill: def $al killed $al killed $eax
+; AVX1OR2-NEXT:    retq
+;
+; AVX512-LABEL: concat_fcmp_v4f64_v2f64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vcmpltpd %xmm0, %xmm2, %k0
+; AVX512-NEXT:    vcmpltpd %xmm1, %xmm2, %k1
+; AVX512-NEXT:    kshiftlb $2, %k1, %k1
+; AVX512-NEXT:    korw %k1, %k0, %k0
+; AVX512-NEXT:    kmovd %k0, %eax
+; AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512-NEXT:    retq
+  %v0 = fcmp ogt <2 x double> %a0, zeroinitializer
+  %v1 = fcmp ogt <2 x double> %a1, zeroinitializer
+  %v = shufflevector <2 x i1> %v0, <2 x i1> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r = bitcast <4 x i1> %v to i4
+  ret i4 %r
+}
+
+define i8 @concat_fcmp_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
+; SSE-LABEL: concat_fcmp_v8f32_v4f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    xorps %xmm2, %xmm2
+; SSE-NEXT:    cmpeqps %xmm2, %xmm0
+; SSE-NEXT:    cmpeqps %xmm2, %xmm1
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    # kill: def $al killed $al killed $eax
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: concat_fcmp_v8f32_v4f32:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1OR2-NEXT:    vcmpeqps %xmm2, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vcmpeqps %xmm2, %xmm1, %xmm1
+; AVX1OR2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1OR2-NEXT:    # kill: def $al killed $al killed $eax
+; AVX1OR2-NEXT:    retq
+;
+; AVX512-LABEL: concat_fcmp_v8f32_v4f32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vcmpeqps %ymm1, %ymm0, %k0
+; AVX512-NEXT:    kmovd %k0, %eax
+; AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %v0 = fcmp oeq <4 x float> %a0, zeroinitializer
+  %v1 = fcmp oeq <4 x float> %a1, zeroinitializer
+  %v = shufflevector <4 x i1> %v0, <4 x i1> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %r = bitcast <8 x i1> %v to i8
+  ret i8 %r
+}
+
+define i8 @concat_fcmp_v8f64_v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> %a3) {
+; SSE-LABEL: concat_fcmp_v8f64_v2f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    xorpd %xmm4, %xmm4
+; SSE-NEXT:    cmpltpd %xmm4, %xmm0
+; SSE-NEXT:    cmpltpd %xmm4, %xmm1
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    cmpltpd %xmm4, %xmm2
+; SSE-NEXT:    cmpltpd %xmm4, %xmm3
+; SSE-NEXT:    packssdw %xmm3, %xmm2
+; SSE-NEXT:    packssdw %xmm0, %xmm0
+; SSE-NEXT:    packssdw %xmm2, %xmm2
+; SSE-NEXT:    packsswb %xmm2, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    # kill: def $al killed $al killed $eax
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: concat_fcmp_v8f64_v2f64:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
+; AVX1OR2-NEXT:    vcmpltpd %xmm4, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vcmpltpd %xmm4, %xmm1, %xmm1
+; AVX1OR2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vcmpltpd %xmm4, %xmm2, %xmm1
+; AVX1OR2-NEXT:    vcmpltpd %xmm4, %xmm3, %xmm2
+; AVX1OR2-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
+; AVX1OR2-NEXT:    vpackssdw %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,3]
+; AVX1OR2-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1OR2-NEXT:    # kill: def $al killed $al killed $eax
+; AVX1OR2-NEXT:    retq
+;
+; AVX512-LABEL: concat_fcmp_v8f64_v2f64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vcmpltpd %zmm1, %zmm0, %k0
+; AVX512-NEXT:    kmovd %k0, %eax
+; AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %v0 = fcmp olt <2 x double> %a0, zeroinitializer
+  %v1 = fcmp olt <2 x double> %a1, zeroinitializer
+  %v2 = fcmp olt <2 x double> %a2, zeroinitializer
+  %v3 = fcmp olt <2 x double> %a3, zeroinitializer
+  %v01 = shufflevector <2 x i1> %v0, <2 x i1> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v23 = shufflevector <2 x i1> %v2, <2 x i1> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v = shufflevector <4 x i1> %v01, <4 x i1> %v23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %r = bitcast <8 x i1> %v to i8
+  ret i8 %r
+}
+
+define i16 @concat_fcmp_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> %a3) {
+; SSE-LABEL: concat_fcmp_v16f32_v4f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    xorps %xmm4, %xmm4
+; SSE-NEXT:    xorps %xmm5, %xmm5
+; SSE-NEXT:    cmpleps %xmm0, %xmm5
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cmpleps %xmm1, %xmm0
+; SSE-NEXT:    packssdw %xmm0, %xmm5
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cmpleps %xmm2, %xmm0
+; SSE-NEXT:    cmpleps %xmm3, %xmm4
+; SSE-NEXT:    packssdw %xmm4, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm5
+; SSE-NEXT:    pmovmskb %xmm5, %eax
+; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: concat_fcmp_v16f32_v4f32:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vxorps %xmm4, %xmm4, %xmm4
+; AVX1OR2-NEXT:    vcmpleps %xmm0, %xmm4, %xmm0
+; AVX1OR2-NEXT:    vcmpleps %xmm1, %xmm4, %xmm1
+; AVX1OR2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vcmpleps %xmm2, %xmm4, %xmm1
+; AVX1OR2-NEXT:    vcmpleps %xmm3, %xmm4, %xmm2
+; AVX1OR2-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
+; AVX1OR2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1OR2-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX1OR2-NEXT:    retq
+;
+; AVX512-LABEL: concat_fcmp_v16f32_v4f32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vcmpleps %zmm0, %zmm1, %k0
+; AVX512-NEXT:    kmovd %k0, %eax
+; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %v0 = fcmp oge <4 x float> %a0, zeroinitializer
+  %v1 = fcmp oge <4 x float> %a1, zeroinitializer
+  %v2 = fcmp oge <4 x float> %a2, zeroinitializer
+  %v3 = fcmp oge <4 x float> %a3, zeroinitializer
+  %v01 = shufflevector <4 x i1> %v0, <4 x i1> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v23 = shufflevector <4 x i1> %v2, <4 x i1> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v = shufflevector <8 x i1> %v01, <8 x i1> %v23, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %r = bitcast <16 x i1> %v to i16
+  ret i16 %r
+}
+
+define i8 @concat_fcmp_v8f64_v4f64(<4 x double> %a0, <4 x double> %a1) {
+; SSE-LABEL: concat_fcmp_v8f64_v4f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    xorpd %xmm4, %xmm4
+; SSE-NEXT:    movapd %xmm1, %xmm5
+; SSE-NEXT:    cmpneqpd %xmm4, %xmm5
+; SSE-NEXT:    cmpordpd %xmm4, %xmm1
+; SSE-NEXT:    andpd %xmm5, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE-NEXT:    movapd %xmm0, %xmm5
+; SSE-NEXT:    cmpneqpd %xmm4, %xmm5
+; SSE-NEXT:    cmpordpd %xmm4, %xmm0
+; SSE-NEXT:    andpd %xmm5, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    movapd %xmm3, %xmm1
+; SSE-NEXT:    cmpneqpd %xmm4, %xmm1
+; SSE-NEXT:    cmpordpd %xmm4, %xmm3
+; SSE-NEXT:    andpd %xmm1, %xmm3
+; SSE-NEXT:    movapd %xmm2, %xmm1
+; SSE-NEXT:    cmpneqpd %xmm4, %xmm1
+; SSE-NEXT:    cmpordpd %xmm4, %xmm2
+; SSE-NEXT:    andpd %xmm1, %xmm2
+; SSE-NEXT:    packssdw %xmm3, %xmm2
+; SSE-NEXT:    packssdw %xmm2, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    # kill: def $al killed $al killed $eax
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: concat_fcmp_v8f64_v4f64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vcmpneq_oqpd %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
+; AVX1-NEXT:    vcmpneq_oqpd %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
+; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    # kill: def $al killed $al killed $eax
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: concat_fcmp_v8f64_v4f64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vcmpneq_oqpd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
+; AVX2-NEXT:    vcmpneq_oqpd %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
+; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    vpmovmskb %xmm0, %eax
+; AVX2-NEXT:    # kill: def $al killed $al killed $eax
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: concat_fcmp_v8f64_v4f64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vcmpneq_oqpd %zmm1, %zmm0, %k0
+; AVX512-NEXT:    kmovd %k0, %eax
+; AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %v0 = fcmp one <4 x double> %a0, zeroinitializer
+  %v1 = fcmp one <4 x double> %a1, zeroinitializer
+  %v = shufflevector <4 x i1> %v0, <4 x i1> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %r = bitcast <8 x i1> %v to i8
+  ret i8 %r
+}
+
+define i16 @concat_fcmp_v16f32_v8f32(<8 x float> %a0, <8 x float> %a1) {
+; SSE-LABEL: concat_fcmp_v16f32_v8f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    xorps %xmm4, %xmm4
+; SSE-NEXT:    cmpleps %xmm4, %xmm1
+; SSE-NEXT:    cmpleps %xmm4, %xmm0
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    cmpleps %xmm4, %xmm3
+; SSE-NEXT:    cmpleps %xmm4, %xmm2
+; SSE-NEXT:    packssdw %xmm3, %xmm2
+; SSE-NEXT:    packsswb %xmm2, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: concat_fcmp_v16f32_v8f32:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1OR2-NEXT:    vcmpleps %ymm2, %ymm0, %ymm0
+; AVX1OR2-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1OR2-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vcmpleps %ymm2, %ymm1, %ymm1
+; AVX1OR2-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1OR2-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
+; AVX1OR2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1OR2-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX1OR2-NEXT:    vzeroupper
+; AVX1OR2-NEXT:    retq
+;
+; AVX512-LABEL: concat_fcmp_v16f32_v8f32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vcmpleps %zmm1, %zmm0, %k0
+; AVX512-NEXT:    kmovd %k0, %eax
+; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %v0 = fcmp ole <8 x float> %a0, zeroinitializer
+  %v1 = fcmp ole <8 x float> %a1, zeroinitializer
+  %v = shufflevector <8 x i1> %v0, <8 x i1> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %r = bitcast <16 x i1> %v to i16
+  ret i16 %r
+}
diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll
index a9ebdaa397015..d82dc412f5e36 100644
--- a/llvm/test/Transforms/Attributor/nofpclass.ll
+++ b/llvm/test/Transforms/Attributor/nofpclass.ll
@@ -2667,15 +2667,10 @@ define [4 x float] @constant_aggregate_zero() {
 }
 
 define <vscale x 4 x float> @scalable_splat_pnorm() {
-; CHECK-CV: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-CV-LABEL: define noundef <vscale x 4 x float> @scalable_splat_pnorm
-; CHECK-CV-SAME: () #[[ATTR3]] {
-; CHECK-CV-NEXT:    ret <vscale x 4 x float> splat (float 1.000000e+00)
-;
-; CHECK-CI: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-CI-LABEL: define noundef nofpclass(nan inf zero sub nnorm) <vscale x 4 x float> @scalable_splat_pnorm
-; CHECK-CI-SAME: () #[[ATTR3]] {
-; CHECK-CI-NEXT:    ret <vscale x 4 x float> splat (float 1.000000e+00)
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define noundef nofpclass(nan inf zero sub nnorm) <vscale x 4 x float> @scalable_splat_pnorm
+; CHECK-SAME: () #[[ATTR3]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> splat (float 1.000000e+00)
 ;
   ret <vscale x 4 x float> splat (float 1.0)
 }
@@ -2689,6 +2684,19 @@ define <vscale x 4 x float> @scalable_splat_zero() {
   ret <vscale x 4 x float> zeroinitializer
 }
 
+define <vscale x 4 x float> @scalable_splat_nnan(float nofpclass(nan) %x) {
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define nofpclass(nan) <vscale x 4 x float> @scalable_splat_nnan
+; CHECK-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[HEAD:%.*]] = insertelement <vscale x 4 x float> poison, float [[X]], i32 0
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[HEAD]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 4 x float> [[SPLAT]]
+;
+  %head = insertelement <vscale x 4 x float> poison, float %x, i32 0
+  %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x float> %splat
+}
+
 ; Verify we do not derive 'nofpclass(inf zero sub norm)' for the argument __x.
 ; See https://github.com/llvm/llvm-project/issues/78507
 
@@ -2989,5 +2997,7 @@ attributes #5 = { "denormal-fp-math"="ieee,positive-zero" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CGSCC-CI: {{.*}}
 ; CGSCC-CV: {{.*}}
+; CHECK-CI: {{.*}}
+; CHECK-CV: {{.*}}
 ; TUNIT-CI: {{.*}}
 ; TUNIT-CV: {{.*}}
diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
index 257816650017a..1a6400997f080 100644
--- a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
@@ -97,7 +97,7 @@ define void @test2(ptr %a, ptr %b, i8 %limit, i1 %arg) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[LIMIT:%.*]] to i32
-; CHECK-NEXT:    br i1 %arg, label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
+; CHECK-NEXT:    br i1 [[ARG:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
 ; CHECK:       for.cond1.preheader.us.preheader:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[CONV]], i32 1)
 ; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER_US:%.*]]
diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
index 2003b1a72206d..acba88ef5a54d 100644
--- a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
@@ -4,13 +4,12 @@
 
 define i32 @remove_loop(i32 %size) #0 {
 ; CHECK-V8M-LABEL: @remove_loop(
-; CHECK-V8M-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-V8M-NEXT:  entry:
-; CHECK-V8M-NEXT:    br label %[[WHILE_COND:.*]]
+; CHECK-V8M-NEXT:    br label [[WHILE_COND:%.*]]
 ; CHECK-V8M:       while.cond:
-; CHECK-V8M-NEXT:    br i1 false, label %[[WHILE_COND]], label %[[WHILE_END:.*]]
+; CHECK-V8M-NEXT:    br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]]
 ; CHECK-V8M:       while.end:
-; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE]], 31
+; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
 ; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
 ; CHECK-V8M-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
 ; CHECK-V8M-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
@@ -19,13 +18,12 @@ define i32 @remove_loop(i32 %size) #0 {
 ; CHECK-V8M-NEXT:    ret i32 [[TMP4]]
 ;
 ; CHECK-V8A-LABEL: @remove_loop(
-; CHECK-V8A-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-V8A-NEXT:  entry:
-; CHECK-V8A-NEXT:    br label %[[WHILE_COND:.*]]
+; CHECK-V8A-NEXT:    br label [[WHILE_COND:%.*]]
 ; CHECK-V8A:       while.cond:
-; CHECK-V8A-NEXT:    br i1 false, label %[[WHILE_COND]], label %[[WHILE_END:.*]]
+; CHECK-V8A-NEXT:    br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]]
 ; CHECK-V8A:       while.end:
-; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE]], 31
+; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
 ; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
 ; CHECK-V8A-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
 ; CHECK-V8A-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
diff --git a/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll b/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll
index cc0f2587266a2..45bb66d1d7d80 100644
--- a/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll
+++ b/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll
@@ -16,7 +16,7 @@ declare void @use(i64 %x)
 define void @loop_0(ptr %a, i1 %arg) {
 ; CHECK-LABEL: @loop_0(
 ; CHECK-NEXT:  Prologue:
-; CHECK-NEXT:    br i1 %arg, label [[B18_PREHEADER:%.*]], label [[B6:%.*]]
+; CHECK-NEXT:    br i1 [[ARG:%.*]], label [[B18_PREHEADER:%.*]], label [[B6:%.*]]
 ; CHECK:       B18.preheader:
 ; CHECK-NEXT:    br label [[B18:%.*]]
 ; CHECK:       B18:
@@ -70,7 +70,7 @@ exit24:                      ; preds = %B18
 define void @loop_0_dead(ptr %a, i1 %arg) {
 ; CHECK-LABEL: @loop_0_dead(
 ; CHECK-NEXT:  Prologue:
-; CHECK-NEXT:    br i1 %arg, label [[B18_PREHEADER:%.*]], label [[B6:%.*]]
+; CHECK-NEXT:    br i1 [[ARG:%.*]], label [[B18_PREHEADER:%.*]], label [[B6:%.*]]
 ; CHECK:       B18.preheader:
 ; CHECK-NEXT:    br label [[B18:%.*]]
 ; CHECK:       B18:
diff --git a/llvm/test/Transforms/IndVarSimplify/X86/pr59615.ll b/llvm/test/Transforms/IndVarSimplify/X86/pr59615.ll
index 17b7b9d40b07a..5ecb684b8b2f5 100644
--- a/llvm/test/Transforms/IndVarSimplify/X86/pr59615.ll
+++ b/llvm/test/Transforms/IndVarSimplify/X86/pr59615.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[VAR:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8, !range [[RNG0:![0-9]+]], !invariant.load !1, !noundef !1
+; CHECK-NEXT:    [[VAR:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8, !range [[RNG0:![0-9]+]], !invariant.load [[META1:![0-9]+]], !noundef [[META1]]
 ; CHECK-NEXT:    [[VAR2:%.*]] = icmp eq i32 [[VAR]], 0
 ; CHECK-NEXT:    br i1 [[VAR2]], label [[BB18:%.*]], label [[BB19:%.*]]
 ; CHECK:       bb3:
@@ -16,7 +16,7 @@ define void @test() {
 ; CHECK:       bb7:
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb8:
-; CHECK-NEXT:    [[VAR9:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8, !range [[RNG0]], !invariant.load !1, !noundef !1
+; CHECK-NEXT:    [[VAR9:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8, !range [[RNG0]], !invariant.load [[META1]], !noundef [[META1]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[VAR9]] to i64
 ; CHECK-NEXT:    [[VAR10:%.*]] = icmp ult i64 [[INDVARS_IV]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[VAR10]], label [[BB12]], label [[BB11:%.*]]
diff --git a/llvm/test/Transforms/IndVarSimplify/debugloc-rem-subst.ll b/llvm/test/Transforms/IndVarSimplify/debugloc-rem-subst.ll
index 121eec75c1b3c..4502416a19477 100644
--- a/llvm/test/Transforms/IndVarSimplify/debugloc-rem-subst.ll
+++ b/llvm/test/Transforms/IndVarSimplify/debugloc-rem-subst.ll
@@ -51,7 +51,7 @@ bb2:                                              ; preds = %bb2, %bb1
 !8 = !DILocation(line: 1, column: 1, scope: !5)
 ;.
 ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: [[META1:![0-9]+]], producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
-; CHECK: [[META1]] = !DIFile(filename: "llvm/test/Transforms/IndVarSimplify/debugloc-rem-subst.ll", directory: {{.*}})
+; CHECK: [[META1]] = !DIFile(filename: "{{.*}}debugloc-rem-subst.ll", directory: {{.*}})
 ; CHECK: [[DBG5]] = distinct !DISubprogram(name: "widget", linkageName: "widget", scope: null, file: [[META1]], line: 1, type: [[META6:![0-9]+]], scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]])
 ; CHECK: [[META6]] = !DISubroutineType(types: [[META7:![0-9]+]])
 ; CHECK: [[META7]] = !{}
diff --git a/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll b/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll
index b4cd98cd234f0..6a809fe45d660 100644
--- a/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll
+++ b/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll
@@ -211,7 +211,7 @@ define void @test6(i32 %m, ptr %p) nounwind uwtable {
 ; CHECK-NEXT:    [[ADD]] = add i32 [[A_05]], [[M:%.*]]
 ; CHECK-NEXT:    [[SOFT_USE:%.*]] = add i32 [[ADD]], 123
 ; CHECK-NEXT:    [[PIDX:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[ADD]]
-; CHECK-NEXT:    store i32 [[SOFT_USE]], ptr [[PIDX]]
+; CHECK-NEXT:    store i32 [[SOFT_USE]], ptr [[PIDX]], align 4
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_06]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll
index b24650830778f..488aed2ba0211 100644
--- a/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll
+++ b/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll
@@ -221,6 +221,220 @@ exit:
 }
 
 define void @many_exits([100 x i64] %len) {
+; CHECK-LABEL: @many_exits(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LEN1:%.*]] = extractvalue [100 x i64] [[LEN:%.*]], 1
+; CHECK-NEXT:    [[LEN2:%.*]] = extractvalue [100 x i64] [[LEN]], 2
+; CHECK-NEXT:    [[LEN3:%.*]] = extractvalue [100 x i64] [[LEN]], 3
+; CHECK-NEXT:    [[LEN4:%.*]] = extractvalue [100 x i64] [[LEN]], 4
+; CHECK-NEXT:    [[LEN5:%.*]] = extractvalue [100 x i64] [[LEN]], 5
+; CHECK-NEXT:    [[LEN6:%.*]] = extractvalue [100 x i64] [[LEN]], 6
+; CHECK-NEXT:    [[LEN7:%.*]] = extractvalue [100 x i64] [[LEN]], 7
+; CHECK-NEXT:    [[LEN8:%.*]] = extractvalue [100 x i64] [[LEN]], 8
+; CHECK-NEXT:    [[LEN9:%.*]] = extractvalue [100 x i64] [[LEN]], 9
+; CHECK-NEXT:    [[LEN10:%.*]] = extractvalue [100 x i64] [[LEN]], 10
+; CHECK-NEXT:    [[LEN11:%.*]] = extractvalue [100 x i64] [[LEN]], 11
+; CHECK-NEXT:    [[LEN12:%.*]] = extractvalue [100 x i64] [[LEN]], 12
+; CHECK-NEXT:    [[LEN13:%.*]] = extractvalue [100 x i64] [[LEN]], 13
+; CHECK-NEXT:    [[LEN14:%.*]] = extractvalue [100 x i64] [[LEN]], 14
+; CHECK-NEXT:    [[LEN15:%.*]] = extractvalue [100 x i64] [[LEN]], 15
+; CHECK-NEXT:    [[LEN16:%.*]] = extractvalue [100 x i64] [[LEN]], 16
+; CHECK-NEXT:    [[LEN17:%.*]] = extractvalue [100 x i64] [[LEN]], 17
+; CHECK-NEXT:    [[LEN18:%.*]] = extractvalue [100 x i64] [[LEN]], 18
+; CHECK-NEXT:    [[LEN19:%.*]] = extractvalue [100 x i64] [[LEN]], 19
+; CHECK-NEXT:    [[LEN20:%.*]] = extractvalue [100 x i64] [[LEN]], 20
+; CHECK-NEXT:    [[LEN21:%.*]] = extractvalue [100 x i64] [[LEN]], 21
+; CHECK-NEXT:    [[LEN22:%.*]] = extractvalue [100 x i64] [[LEN]], 22
+; CHECK-NEXT:    [[LEN23:%.*]] = extractvalue [100 x i64] [[LEN]], 23
+; CHECK-NEXT:    [[LEN24:%.*]] = extractvalue [100 x i64] [[LEN]], 24
+; CHECK-NEXT:    [[LEN25:%.*]] = extractvalue [100 x i64] [[LEN]], 25
+; CHECK-NEXT:    [[LEN26:%.*]] = extractvalue [100 x i64] [[LEN]], 26
+; CHECK-NEXT:    [[LEN27:%.*]] = extractvalue [100 x i64] [[LEN]], 27
+; CHECK-NEXT:    [[LEN28:%.*]] = extractvalue [100 x i64] [[LEN]], 28
+; CHECK-NEXT:    [[LEN29:%.*]] = extractvalue [100 x i64] [[LEN]], 29
+; CHECK-NEXT:    [[LEN30:%.*]] = extractvalue [100 x i64] [[LEN]], 30
+; CHECK-NEXT:    [[LEN31:%.*]] = extractvalue [100 x i64] [[LEN]], 31
+; CHECK-NEXT:    [[LEN32:%.*]] = extractvalue [100 x i64] [[LEN]], 32
+; CHECK-NEXT:    [[LEN33:%.*]] = extractvalue [100 x i64] [[LEN]], 33
+; CHECK-NEXT:    [[LEN34:%.*]] = extractvalue [100 x i64] [[LEN]], 34
+; CHECK-NEXT:    [[LEN35:%.*]] = extractvalue [100 x i64] [[LEN]], 35
+; CHECK-NEXT:    [[LEN36:%.*]] = extractvalue [100 x i64] [[LEN]], 36
+; CHECK-NEXT:    [[LEN37:%.*]] = extractvalue [100 x i64] [[LEN]], 37
+; CHECK-NEXT:    [[LEN38:%.*]] = extractvalue [100 x i64] [[LEN]], 38
+; CHECK-NEXT:    [[LEN39:%.*]] = extractvalue [100 x i64] [[LEN]], 39
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT:    [[LEN0:%.*]] = extractvalue [100 x i64] [[LEN]], 0
+; CHECK-NEXT:    [[EARLY0:%.*]] = icmp eq i64 [[IV]], [[LEN0]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY0]], label [[EXIT:%.*]], label [[CONT0:%.*]]
+; CHECK:       cont0:
+; CHECK-NEXT:    [[EARLY1:%.*]] = icmp eq i64 [[IV]], [[LEN1]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY1]], label [[EXIT]], label [[CONT1:%.*]]
+; CHECK:       cont1:
+; CHECK-NEXT:    [[EARLY2:%.*]] = icmp eq i64 [[IV]], [[LEN2]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY2]], label [[EXIT]], label [[CONT2:%.*]]
+; CHECK:       cont2:
+; CHECK-NEXT:    [[EARLY3:%.*]] = icmp eq i64 [[IV]], [[LEN3]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY3]], label [[EXIT]], label [[CONT3:%.*]]
+; CHECK:       cont3:
+; CHECK-NEXT:    [[EARLY4:%.*]] = icmp eq i64 [[IV]], [[LEN4]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY4]], label [[EXIT]], label [[CONT4:%.*]]
+; CHECK:       cont4:
+; CHECK-NEXT:    [[EARLY5:%.*]] = icmp eq i64 [[IV]], [[LEN5]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY5]], label [[EXIT]], label [[CONT5:%.*]]
+; CHECK:       cont5:
+; CHECK-NEXT:    [[EARLY6:%.*]] = icmp eq i64 [[IV]], [[LEN6]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY6]], label [[EXIT]], label [[CONT6:%.*]]
+; CHECK:       cont6:
+; CHECK-NEXT:    [[EARLY7:%.*]] = icmp eq i64 [[IV]], [[LEN7]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY7]], label [[EXIT]], label [[CONT7:%.*]]
+; CHECK:       cont7:
+; CHECK-NEXT:    [[EARLY8:%.*]] = icmp eq i64 [[IV]], [[LEN8]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY8]], label [[EXIT]], label [[CONT8:%.*]]
+; CHECK:       cont8:
+; CHECK-NEXT:    [[EARLY9:%.*]] = icmp eq i64 [[IV]], [[LEN9]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY9]], label [[EXIT]], label [[CONT9:%.*]]
+; CHECK:       cont9:
+; CHECK-NEXT:    [[EARLY10:%.*]] = icmp eq i64 [[IV]], [[LEN10]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY10]], label [[EXIT]], label [[CONT10:%.*]]
+; CHECK:       cont10:
+; CHECK-NEXT:    [[EARLY11:%.*]] = icmp eq i64 [[IV]], [[LEN11]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY11]], label [[EXIT]], label [[CONT11:%.*]]
+; CHECK:       cont11:
+; CHECK-NEXT:    [[EARLY12:%.*]] = icmp eq i64 [[IV]], [[LEN12]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY12]], label [[EXIT]], label [[CONT12:%.*]]
+; CHECK:       cont12:
+; CHECK-NEXT:    [[EARLY13:%.*]] = icmp eq i64 [[IV]], [[LEN13]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY13]], label [[EXIT]], label [[CONT13:%.*]]
+; CHECK:       cont13:
+; CHECK-NEXT:    [[EARLY14:%.*]] = icmp eq i64 [[IV]], [[LEN14]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY14]], label [[EXIT]], label [[CONT14:%.*]]
+; CHECK:       cont14:
+; CHECK-NEXT:    [[EARLY15:%.*]] = icmp eq i64 [[IV]], [[LEN15]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY15]], label [[EXIT]], label [[CONT15:%.*]]
+; CHECK:       cont15:
+; CHECK-NEXT:    [[EARLY16:%.*]] = icmp eq i64 [[IV]], [[LEN16]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY16]], label [[EXIT]], label [[CONT16:%.*]]
+; CHECK:       cont16:
+; CHECK-NEXT:    [[EARLY17:%.*]] = icmp eq i64 [[IV]], [[LEN17]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY17]], label [[EXIT]], label [[CONT17:%.*]]
+; CHECK:       cont17:
+; CHECK-NEXT:    [[EARLY18:%.*]] = icmp eq i64 [[IV]], [[LEN18]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY18]], label [[EXIT]], label [[CONT18:%.*]]
+; CHECK:       cont18:
+; CHECK-NEXT:    [[EARLY19:%.*]] = icmp eq i64 [[IV]], [[LEN19]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY19]], label [[EXIT]], label [[CONT19:%.*]]
+; CHECK:       cont19:
+; CHECK-NEXT:    [[EARLY20:%.*]] = icmp eq i64 [[IV]], [[LEN20]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY20]], label [[EXIT]], label [[CONT20:%.*]]
+; CHECK:       cont20:
+; CHECK-NEXT:    [[EARLY21:%.*]] = icmp eq i64 [[IV]], [[LEN21]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY21]], label [[EXIT]], label [[CONT21:%.*]]
+; CHECK:       cont21:
+; CHECK-NEXT:    [[EARLY22:%.*]] = icmp eq i64 [[IV]], [[LEN22]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY22]], label [[EXIT]], label [[CONT22:%.*]]
+; CHECK:       cont22:
+; CHECK-NEXT:    [[EARLY23:%.*]] = icmp eq i64 [[IV]], [[LEN23]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY23]], label [[EXIT]], label [[CONT23:%.*]]
+; CHECK:       cont23:
+; CHECK-NEXT:    [[EARLY24:%.*]] = icmp eq i64 [[IV]], [[LEN24]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY24]], label [[EXIT]], label [[CONT24:%.*]]
+; CHECK:       cont24:
+; CHECK-NEXT:    [[EARLY25:%.*]] = icmp eq i64 [[IV]], [[LEN25]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY25]], label [[EXIT]], label [[CONT25:%.*]]
+; CHECK:       cont25:
+; CHECK-NEXT:    [[EARLY26:%.*]] = icmp eq i64 [[IV]], [[LEN26]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY26]], label [[EXIT]], label [[CONT26:%.*]]
+; CHECK:       cont26:
+; CHECK-NEXT:    [[EARLY27:%.*]] = icmp eq i64 [[IV]], [[LEN27]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY27]], label [[EXIT]], label [[CONT27:%.*]]
+; CHECK:       cont27:
+; CHECK-NEXT:    [[EARLY28:%.*]] = icmp eq i64 [[IV]], [[LEN28]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY28]], label [[EXIT]], label [[CONT28:%.*]]
+; CHECK:       cont28:
+; CHECK-NEXT:    [[EARLY29:%.*]] = icmp eq i64 [[IV]], [[LEN29]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY29]], label [[EXIT]], label [[CONT29:%.*]]
+; CHECK:       cont29:
+; CHECK-NEXT:    [[EARLY30:%.*]] = icmp eq i64 [[IV]], [[LEN30]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY30]], label [[EXIT]], label [[CONT30:%.*]]
+; CHECK:       cont30:
+; CHECK-NEXT:    [[EARLY31:%.*]] = icmp eq i64 [[IV]], [[LEN31]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY31]], label [[EXIT]], label [[CONT31:%.*]]
+; CHECK:       cont31:
+; CHECK-NEXT:    [[EARLY32:%.*]] = icmp eq i64 [[IV]], [[LEN32]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY32]], label [[EXIT]], label [[CONT32:%.*]]
+; CHECK:       cont32:
+; CHECK-NEXT:    [[EARLY33:%.*]] = icmp eq i64 [[IV]], [[LEN33]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY33]], label [[EXIT]], label [[CONT33:%.*]]
+; CHECK:       cont33:
+; CHECK-NEXT:    [[EARLY34:%.*]] = icmp eq i64 [[IV]], [[LEN34]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY34]], label [[EXIT]], label [[CONT34:%.*]]
+; CHECK:       cont34:
+; CHECK-NEXT:    [[EARLY35:%.*]] = icmp eq i64 [[IV]], [[LEN35]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY35]], label [[EXIT]], label [[CONT35:%.*]]
+; CHECK:       cont35:
+; CHECK-NEXT:    [[EARLY36:%.*]] = icmp eq i64 [[IV]], [[LEN36]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY36]], label [[EXIT]], label [[CONT36:%.*]]
+; CHECK:       cont36:
+; CHECK-NEXT:    [[EARLY37:%.*]] = icmp eq i64 [[IV]], [[LEN37]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY37]], label [[EXIT]], label [[CONT37:%.*]]
+; CHECK:       cont37:
+; CHECK-NEXT:    [[EARLY38:%.*]] = icmp eq i64 [[IV]], [[LEN38]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY38]], label [[EXIT]], label [[CONT38:%.*]]
+; CHECK:       cont38:
+; CHECK-NEXT:    [[EARLY39:%.*]] = icmp eq i64 [[IV]], [[LEN39]]
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br i1 [[EARLY39]], label [[EXIT]], label [[CONT39:%.*]]
+; CHECK:       cont39:
+; CHECK-NEXT:    br label [[BACKEDGE]]
+; CHECK:       backedge:
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[IV]], 999
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    br i1 [[CMP2]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
 loop:
diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-sat.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-sat.ll
index 9fcfc7c9b349a..dc0e49efb091f 100644
--- a/llvm/test/Transforms/IndVarSimplify/eliminate-sat.ll
+++ b/llvm/test/Transforms/IndVarSimplify/eliminate-sat.ll
@@ -13,7 +13,7 @@ define void @uadd_sat(ptr %p) {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[SAT1:%.*]] = add nuw nsw i32 [[I]], 1
-; CHECK-NEXT:    store volatile i32 [[SAT1]], ptr [[P:%.*]]
+; CHECK-NEXT:    store volatile i32 [[SAT1]], ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[I_INC]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I_INC]], 100
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[END:%.*]]
@@ -42,7 +42,7 @@ define void @sadd_sat(ptr %p) {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[SAT1:%.*]] = add nuw nsw i32 [[I]], 1
-; CHECK-NEXT:    store volatile i32 [[SAT1]], ptr [[P:%.*]]
+; CHECK-NEXT:    store volatile i32 [[SAT1]], ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[I_INC]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I_INC]], 100
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[END:%.*]]
@@ -71,7 +71,7 @@ define void @usub_sat(ptr %p) {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[SAT1:%.*]] = sub nuw nsw i32 [[I]], 1
-; CHECK-NEXT:    store volatile i32 [[SAT1]], ptr [[P:%.*]]
+; CHECK-NEXT:    store volatile i32 [[SAT1]], ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[I_INC]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I_INC]], 100
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[END:%.*]]
@@ -100,7 +100,7 @@ define void @ssub_sat(ptr %p) {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[SAT1:%.*]] = sub nsw i32 [[I]], 1
-; CHECK-NEXT:    store volatile i32 [[SAT1]], ptr [[P:%.*]]
+; CHECK-NEXT:    store volatile i32 [[SAT1]], ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[I_INC]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I_INC]], 100
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[END:%.*]]
diff --git a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll
index a1c07b0a24638..1f8bf5fecb248 100644
--- a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll
+++ b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll
@@ -6,7 +6,7 @@
 define i32 @test_01(ptr %p) {
 ; CHECK-LABEL: @test_01(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, [[RNG0:!range !.*]]
+; CHECK-NEXT:    [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG0:![0-9]+]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
@@ -45,7 +45,7 @@ exit:
 define i32 @test_01_neg(ptr %p) {
 ; CHECK-LABEL: @test_01_neg(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, [[RNG0]]
+; CHECK-NEXT:    [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG0]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
@@ -86,7 +86,7 @@ exit:
 define i32 @test_02(ptr %p) {
 ; CHECK-LABEL: @test_02(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, [[RNG1:!range !.*]]
+; CHECK-NEXT:    [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG1:![0-9]+]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
@@ -125,7 +125,7 @@ exit:
 define i32 @test_02_neg(ptr %p) {
 ; CHECK-LABEL: @test_02_neg(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, [[RNG1]]
+; CHECK-NEXT:    [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG1]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
@@ -164,7 +164,7 @@ exit:
 define i32 @test_03(ptr %p) {
 ; CHECK-LABEL: @test_03(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, [[RNG2:!range !.*]]
+; CHECK-NEXT:    [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG2:![0-9]+]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
@@ -202,7 +202,7 @@ exit:
 define i32 @test_04(ptr %p) {
 ; CHECK-LABEL: @test_04(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, [[RNG2]]
+; CHECK-NEXT:    [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG2]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
index 1944a9c800355..5fe72ea0d4fea 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
@@ -230,8 +230,6 @@ exit:
 
 ; The control-flow before and after the load of qval shouldn't prevent postindex
 ; addressing from happening.
-; FIXME: We choose postindex addressing, but the scevgep is placed in for.inc so
-; during codegen we will fail to actually generate a postindex load.
 define void @middle_block_load(ptr %p, ptr %q, i64 %n) {
 ; CHECK-LABEL: define void @middle_block_load(
 ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i64 [[N:%.*]]) {
@@ -254,6 +252,7 @@ define void @middle_block_load(ptr %p, ptr %q, i64 %n) {
 ; CHECK:       [[IF_END]]:
 ; CHECK-NEXT:    [[QVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[QVAL]], 0
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
 ; CHECK-NEXT:    br i1 [[CMP2]], label %[[IF_THEN2:.*]], label %[[IF_ELSE2:.*]]
 ; CHECK:       [[IF_THEN2]]:
 ; CHECK-NEXT:    tail call void @otherfn1()
@@ -263,7 +262,6 @@ define void @middle_block_load(ptr %p, ptr %q, i64 %n) {
 ; CHECK-NEXT:    br label %[[FOR_INC]]
 ; CHECK:       [[FOR_INC]]:
 ; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
-; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
 ; CHECK-NEXT:    br i1 [[CMP3]], label %[[EXIT:.*]], label %[[FOR_BODY]]
 ; CHECK:       [[EXIT]]:
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
index de54852313456..7be3a94b90e46 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
@@ -265,8 +265,8 @@ define void @test4(i16 %c3) {
 ; CHECK-NEXT:    br label [[EXITING_PROL:%.*]]
 ; CHECK:       exiting.prol:
 ; CHECK-NEXT:    switch i16 [[C3:%.*]], label [[DEFAULT_LOOPEXIT_LOOPEXIT1:%.*]] [
-; CHECK-NEXT:    i16 45, label [[OTHEREXIT_LOOPEXIT2:%.*]]
-; CHECK-NEXT:    i16 95, label [[LATCH_PROL]]
+; CHECK-NEXT:      i16 45, label [[OTHEREXIT_LOOPEXIT2:%.*]]
+; CHECK-NEXT:      i16 95, label [[LATCH_PROL]]
 ; CHECK-NEXT:    ]
 ; CHECK:       latch.prol:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
@@ -288,29 +288,29 @@ define void @test4(i16 %c3) {
 ; CHECK-NEXT:    br label [[EXITING:%.*]]
 ; CHECK:       exiting:
 ; CHECK-NEXT:    switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT:%.*]] [
-; CHECK-NEXT:    i16 45, label [[OTHEREXIT_LOOPEXIT:%.*]]
-; CHECK-NEXT:    i16 95, label [[LATCH:%.*]]
+; CHECK-NEXT:      i16 45, label [[OTHEREXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT:      i16 95, label [[LATCH:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       latch:
 ; CHECK-NEXT:    br label [[EXITING_1:%.*]]
 ; CHECK:       exiting.1:
 ; CHECK-NEXT:    switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [
-; CHECK-NEXT:    i16 45, label [[OTHEREXIT_LOOPEXIT]]
-; CHECK-NEXT:    i16 95, label [[LATCH_1:%.*]]
+; CHECK-NEXT:      i16 45, label [[OTHEREXIT_LOOPEXIT]]
+; CHECK-NEXT:      i16 95, label [[LATCH_1:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       latch.1:
 ; CHECK-NEXT:    br label [[EXITING_2:%.*]]
 ; CHECK:       exiting.2:
 ; CHECK-NEXT:    switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [
-; CHECK-NEXT:    i16 45, label [[OTHEREXIT_LOOPEXIT]]
-; CHECK-NEXT:    i16 95, label [[LATCH_2:%.*]]
+; CHECK-NEXT:      i16 45, label [[OTHEREXIT_LOOPEXIT]]
+; CHECK-NEXT:      i16 95, label [[LATCH_2:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       latch.2:
 ; CHECK-NEXT:    br label [[EXITING_3:%.*]]
 ; CHECK:       exiting.3:
 ; CHECK-NEXT:    switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [
-; CHECK-NEXT:    i16 45, label [[OTHEREXIT_LOOPEXIT]]
-; CHECK-NEXT:    i16 95, label [[LATCH_3]]
+; CHECK-NEXT:      i16 45, label [[OTHEREXIT_LOOPEXIT]]
+; CHECK-NEXT:      i16 95, label [[LATCH_3]]
 ; CHECK-NEXT:    ]
 ; CHECK:       latch.3:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
index 9ee51cfbcb590..a3d2fcb5ab946 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
@@ -638,18 +638,18 @@ define i32 @test6() #0 {
 ; CHECK:       [[FOR_LATCH]]:
 ; CHECK-NEXT:    br i1 false, label %[[FOR_OUTER]], label %[[FOR_END_UNR_LCSSA:.*]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       [[FOR_END_UNR_LCSSA]]:
-; CHECK-NEXT:    [[DOTLCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ]
-; CHECK-NEXT:    [[INC_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 7, %[[FOR_LATCH]] ]
-; CHECK-NEXT:    [[P0_UNR_PH:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ]
+; CHECK-NEXT:    [[DOTLCSSA_LCSSA_PH:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ]
+; CHECK-NEXT:    [[INC_LCSSA_LCSSA_PH:%.*]] = phi i32 [ 7, %[[FOR_LATCH]] ]
+; CHECK-NEXT:    [[P0_UNR:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    br i1 true, label %[[FOR_OUTER_EPIL_PREHEADER]], label %[[FOR_END:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
-; CHECK-NEXT:    [[P0_UNR:%.*]] = phi i32 [ [[F_PROMOTED10]], %[[ENTRY]] ], [ [[P0_UNR_PH]], %[[FOR_END_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[P0_EPIL_INIT:%.*]] = phi i32 [ [[F_PROMOTED10]], %[[ENTRY]] ], [ [[P0_UNR]], %[[FOR_END_UNR_LCSSA]] ]
 ; CHECK-NEXT:    call void @llvm.assume(i1 true)
 ; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL]]:
 ; CHECK-NEXT:    br label %[[FOR_INNER_EPIL:.*]]
 ; CHECK:       [[FOR_INNER_EPIL]]:
-; CHECK-NEXT:    [[P1_EPIL:%.*]] = phi i32 [ [[P0_UNR]], %[[FOR_OUTER_EPIL]] ], [ 2, %[[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    [[P1_EPIL:%.*]] = phi i32 [ [[P0_EPIL_INIT]], %[[FOR_OUTER_EPIL]] ], [ 2, %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    [[INC_SINK8_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    [[INC_EPIL]] = add nuw nsw i32 [[INC_SINK8_EPIL]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp ne i32 [[INC_EPIL]], 7
@@ -658,8 +658,8 @@ define i32 @test6() #0 {
 ; CHECK-NEXT:    [[DOTLCSSA_EPIL:%.*]] = phi i32 [ [[P1_EPIL]], %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    br label %[[FOR_END]]
 ; CHECK:       [[FOR_END]]:
-; CHECK-NEXT:    [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_LCSSA_PH_PH]], %[[FOR_END_UNR_LCSSA]] ], [ [[DOTLCSSA_EPIL]], %[[FOR_LATCH_EPIL]] ]
-; CHECK-NEXT:    [[INC_LCSSA_LCSSA:%.*]] = phi i32 [ [[INC_LCSSA_LCSSA_PH_PH]], %[[FOR_END_UNR_LCSSA]] ], [ 7, %[[FOR_LATCH_EPIL]] ]
+; CHECK-NEXT:    [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_LCSSA_PH]], %[[FOR_END_UNR_LCSSA]] ], [ [[DOTLCSSA_EPIL]], %[[FOR_LATCH_EPIL]] ]
+; CHECK-NEXT:    [[INC_LCSSA_LCSSA:%.*]] = phi i32 [ [[INC_LCSSA_LCSSA_PH]], %[[FOR_END_UNR_LCSSA]] ], [ 7, %[[FOR_LATCH_EPIL]] ]
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
@@ -1324,9 +1324,9 @@ define signext i16 @test10(i32 %k) #0 {
 ; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_3:%.*]] = phi i64 [ [[STOREMERGE_4_3:%.*]], %[[FOR_INC21_3]] ]
 ; CHECK-NEXT:    br i1 false, label %[[FOR_BODY]], label %[[FOR_END26_UNR_LCSSA:.*]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK:       [[FOR_END26_UNR_LCSSA]]:
-; CHECK-NEXT:    [[DEC_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ 0, %[[FOR_INC24]] ]
-; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_3]], %[[FOR_INC24]] ]
-; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 0, %[[FOR_INC24]] ]
+; CHECK-NEXT:    [[DEC_LCSSA_LCSSA_PH:%.*]] = phi i64 [ 0, %[[FOR_INC24]] ]
+; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA_PH:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_3]], %[[FOR_INC24]] ]
+; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA_PH:%.*]] = phi i32 [ 0, %[[FOR_INC24]] ]
 ; CHECK-NEXT:    br i1 true, label %[[FOR_BODY_EPIL_PREHEADER]], label %[[FOR_END26:.*]]
 ; CHECK:       [[FOR_BODY_EPIL_PREHEADER]]:
 ; CHECK-NEXT:    call void @llvm.assume(i1 true)
@@ -1353,9 +1353,9 @@ define signext i16 @test10(i32 %k) #0 {
 ; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_EPIL:%.*]] = phi i64 [ [[STOREMERGE_4_EPIL]], %[[FOR_INC21_EPIL]] ]
 ; CHECK-NEXT:    br label %[[FOR_END26]]
 ; CHECK:       [[FOR_END26]]:
-; CHECK-NEXT:    [[DEC_LCSSA_LCSSA:%.*]] = phi i64 [ [[DEC_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ]
-; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ [[STOREMERGE_4_LCSSA_EPIL]], %[[FOR_INC24_EPIL]] ]
-; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA:%.*]] = phi i32 [ [[STOREMERGE_5_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ]
+; CHECK-NEXT:    [[DEC_LCSSA_LCSSA:%.*]] = phi i64 [ [[DEC_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ]
+; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ [[STOREMERGE_4_LCSSA_EPIL]], %[[FOR_INC24_EPIL]] ]
+; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA:%.*]] = phi i32 [ [[STOREMERGE_5_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ]
 ; CHECK-NEXT:    store i64 [[DEC_LCSSA_LCSSA]], ptr @g, align 8
 ; CHECK-NEXT:    ret i16 0
 ; CHECK:       [[FOR_BODY2_SPLIT2_1]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index 21b21774d18cf..91c65ba8f6267 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -380,7 +380,7 @@ for.end:
   ret void
 }
 
-define void @loop_with_freeze_and_conditional_srem(ptr %dst, ptr %keyinfo, ptr %invariant.ptr, i32 %divisor) #1 {
+define void @loop_with_freeze_and_conditional_srem(ptr %dst, ptr %keyinfo, ptr %invariant.ptr, i32 %divisor) {
 ; COMMON-LABEL: define void @loop_with_freeze_and_conditional_srem(
 ; COMMON-SAME: ptr [[DST:%.*]], ptr [[KEYINFO:%.*]], ptr [[INVARIANT_PTR:%.*]], i32 [[DIVISOR:%.*]]) {
 ; COMMON-NEXT:  [[ENTRY:.*]]:
@@ -433,7 +433,165 @@ exit:                                             ; preds = %loop.latch
   ret void
 }
 
+define void @interleave_group(ptr %dst) #1 {
+; COST1-LABEL: define void @interleave_group(
+; COST1-SAME: ptr [[DST:%.*]]) #[[ATTR1:[0-9]+]] {
+; COST1-NEXT:  [[ITER_CHECK:.*:]]
+; COST1-NEXT:    br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; COST1:       [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; COST1-NEXT:    br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; COST1:       [[VECTOR_PH]]:
+; COST1-NEXT:    br label %[[VECTOR_BODY:.*]]
+; COST1:       [[VECTOR_BODY]]:
+; COST1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; COST1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 16
+; COST1-NEXT:    [[TMP1:%.*]] = mul i64 [[INDEX]], 3
+; COST1-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP0]], 3
+; COST1-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]]
+; COST1-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]]
+; COST1-NEXT:    store <48 x i8> zeroinitializer, ptr [[TMP3]], align 1
+; COST1-NEXT:    store <48 x i8> zeroinitializer, ptr [[TMP4]], align 1
+; COST1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
+; COST1-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
+; COST1-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; COST1:       [[MIDDLE_BLOCK]]:
+; COST1-NEXT:    br i1 false, [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; COST1:       [[VEC_EPILOG_ITER_CHECK]]:
+; COST1-NEXT:    br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4]]
+; COST1:       [[VEC_EPILOG_PH]]:
+; COST1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; COST1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
+; COST1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; COST1-NEXT:    [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; COST1-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; COST1:       [[VEC_EPILOG_VECTOR_BODY]]:
+; COST1-NEXT:    [[INDEX1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; COST1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; COST1-NEXT:    [[TMP6:%.*]] = mul <4 x i64> [[VEC_IND]], splat (i64 3)
+; COST1-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
+; COST1-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
+; COST1-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
+; COST1-NEXT:    [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; COST1-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
+; COST1-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
+; COST1-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP9]]
+; COST1-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]]
+; COST1-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP11]], i64 2
+; COST1-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP12]], i64 2
+; COST1-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 2
+; COST1-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP14]], i64 2
+; COST1-NEXT:    store i8 0, ptr [[TMP15]], align 1
+; COST1-NEXT:    store i8 0, ptr [[TMP16]], align 1
+; COST1-NEXT:    store i8 0, ptr [[TMP17]], align 1
+; COST1-NEXT:    store i8 0, ptr [[TMP18]], align 1
+; COST1-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP11]], i64 1
+; COST1-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP12]], i64 1
+; COST1-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP13]], i64 1
+; COST1-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[TMP14]], i64 1
+; COST1-NEXT:    store i8 0, ptr [[TMP19]], align 1
+; COST1-NEXT:    store i8 0, ptr [[TMP20]], align 1
+; COST1-NEXT:    store i8 0, ptr [[TMP21]], align 1
+; COST1-NEXT:    store i8 0, ptr [[TMP22]], align 1
+; COST1-NEXT:    store i8 0, ptr [[TMP11]], align 1
+; COST1-NEXT:    store i8 0, ptr [[TMP12]], align 1
+; COST1-NEXT:    store i8 0, ptr [[TMP13]], align 1
+; COST1-NEXT:    store i8 0, ptr [[TMP14]], align 1
+; COST1-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4
+; COST1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; COST1-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 100
+; COST1-NEXT:    br i1 [[TMP23]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; COST1:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; COST1-NEXT:    br i1 false, [[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; COST1:       [[VEC_EPILOG_SCALAR_PH]]:
+;
+; COST10-LABEL: define void @interleave_group(
+; COST10-SAME: ptr [[DST:%.*]]) #[[ATTR1:[0-9]+]] {
+; COST10-NEXT:  [[ITER_CHECK:.*:]]
+; COST10-NEXT:    br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; COST10:       [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; COST10-NEXT:    br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; COST10:       [[VECTOR_PH]]:
+; COST10-NEXT:    br label %[[VECTOR_BODY:.*]]
+; COST10:       [[VECTOR_BODY]]:
+; COST10-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; COST10-NEXT:    [[TMP0:%.*]] = mul i64 [[INDEX]], 3
+; COST10-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
+; COST10-NEXT:    store <48 x i8> zeroinitializer, ptr [[TMP1]], align 1
+; COST10-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; COST10-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
+; COST10-NEXT:    br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; COST10:       [[MIDDLE_BLOCK]]:
+; COST10-NEXT:    br i1 false, [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; COST10:       [[VEC_EPILOG_ITER_CHECK]]:
+; COST10-NEXT:    br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4]]
+; COST10:       [[VEC_EPILOG_PH]]:
+; COST10-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; COST10-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
+; COST10-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; COST10-NEXT:    [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; COST10-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; COST10:       [[VEC_EPILOG_VECTOR_BODY]]:
+; COST10-NEXT:    [[INDEX1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; COST10-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; COST10-NEXT:    [[TMP3:%.*]] = mul <4 x i64> [[VEC_IND]], splat (i64 3)
+; COST10-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
+; COST10-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
+; COST10-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
+; COST10-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; COST10-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
+; COST10-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]]
+; COST10-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
+; COST10-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
+; COST10-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8]], i64 2
+; COST10-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP9]], i64 2
+; COST10-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP10]], i64 2
+; COST10-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP11]], i64 2
+; COST10-NEXT:    store i8 0, ptr [[TMP12]], align 1
+; COST10-NEXT:    store i8 0, ptr [[TMP13]], align 1
+; COST10-NEXT:    store i8 0, ptr [[TMP14]], align 1
+; COST10-NEXT:    store i8 0, ptr [[TMP15]], align 1
+; COST10-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP8]], i64 1
+; COST10-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1
+; COST10-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP10]], i64 1
+; COST10-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP11]], i64 1
+; COST10-NEXT:    store i8 0, ptr [[TMP16]], align 1
+; COST10-NEXT:    store i8 0, ptr [[TMP17]], align 1
+; COST10-NEXT:    store i8 0, ptr [[TMP18]], align 1
+; COST10-NEXT:    store i8 0, ptr [[TMP19]], align 1
+; COST10-NEXT:    store i8 0, ptr [[TMP8]], align 1
+; COST10-NEXT:    store i8 0, ptr [[TMP9]], align 1
+; COST10-NEXT:    store i8 0, ptr [[TMP10]], align 1
+; COST10-NEXT:    store i8 0, ptr [[TMP11]], align 1
+; COST10-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4
+; COST10-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; COST10-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 100
+; COST10-NEXT:    br i1 [[TMP20]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; COST10:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; COST10-NEXT:    br i1 false, [[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; COST10:       [[VEC_EPILOG_SCALAR_PH]]:
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %iv.3 = mul i64 %iv, 3
+  %gep.0 = getelementptr i8, ptr %dst, i64 %iv.3
+  %gep.2 = getelementptr i8, ptr %gep.0, i64 2
+  store i8 0, ptr %gep.2, align 1
+  %gep.1 = getelementptr i8, ptr %gep.0, i64 1
+  store i8 0, ptr %gep.1, align 1
+  store i8 0, ptr %gep.0, align 1
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv, 100
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
 attributes #0 = { "target-features"="+neon,+sve" vscale_range(1,16) }
+attributes #1 = { "target-cpu"="neoverse-512tvb" }
 
 declare void @llvm.assume(i1 noundef)
 declare i64 @llvm.umin.i64(i64, i64)
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
index ae772da8862b3..cdbe9bb555834 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
@@ -755,7 +755,7 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[VECTOR_MEMCHECK:.*]]
 ; CHECK:       [[VECTOR_MEMCHECK]]:
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 12688
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 12696
 ; CHECK-NEXT:    [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 12828
 ; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP8]]
 ; CHECK-NEXT:    [[BOUND2:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
@@ -764,88 +764,59 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE9:.*]] ]
+; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = mul i64 [[INDEX1]], 16
 ; CHECK-NEXT:    [[IV:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[GEP_SRC]], i32 0
-; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[TMP22]], i32 1
 ; CHECK-NEXT:    [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152
 ; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[TMP22]], i64 152
 ; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[GEP_FLAG]], align 4, !alias.scope [[META78:![0-9]+]]
 ; CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META78]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP27]], i32 0
 ; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <2 x i32> [[TMP29]], i32 [[TMP28]], i32 1
-; CHECK-NEXT:    [[TMP31:%.*]] = icmp eq <2 x i32> [[TMP30]], zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq <2 x i32> [[TMP30]], zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = load double, ptr [[GEP_SRC]], align 8, !alias.scope [[META78]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = load double, ptr [[TMP22]], align 8, !alias.scope [[META78]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i32 1
-; CHECK-NEXT:    [[TMP33:%.*]] = xor <2 x i1> [[TMP31]], splat (i1 true)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i1> [[TMP10]], splat (i1 true)
 ; CHECK-NEXT:    [[TMP34:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 8.000000e+00)
-; CHECK-NEXT:    [[GEP_DST1_ELSE:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP37:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <2 x ptr> poison, ptr [[GEP_DST1_ELSE]], i32 0
-; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <2 x ptr> [[TMP38]], ptr [[TMP37]], i32 1
-; CHECK-NEXT:    [[TMP40:%.*]] = extractelement <2 x i1> [[TMP33]], i32 0
-; CHECK-NEXT:    br i1 [[TMP40]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK:       [[PRED_LOAD_IF]]:
-; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <2 x double> [[TMP34]], i32 0
-; CHECK-NEXT:    store double [[TMP41]], ptr [[GEP_DST1_ELSE]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
-; CHECK-NEXT:    [[GEP_SRC_16:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 16
-; CHECK-NEXT:    [[TMP43:%.*]] = load double, ptr [[GEP_SRC_16]], align 8, !alias.scope [[META78]]
-; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <2 x double> poison, double [[TMP43]], i32 0
-; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE]]
-; CHECK:       [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT:    [[TMP45:%.*]] = phi <2 x double> [ poison, %[[VECTOR_BODY]] ], [ [[TMP44]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT:    [[TMP46:%.*]] = extractelement <2 x i1> [[TMP33]], i32 1
-; CHECK-NEXT:    br i1 [[TMP46]], label %[[PRED_LOAD_IF2:.*]], label %[[PRED_LOAD_CONTINUE3:.*]]
-; CHECK:       [[PRED_LOAD_IF2]]:
-; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <2 x double> [[TMP34]], i32 1
-; CHECK-NEXT:    store double [[TMP47]], ptr [[TMP37]], align 8, !alias.scope [[META81]], !noalias [[META78]]
-; CHECK-NEXT:    [[TMP48:%.*]] = getelementptr i8, ptr [[TMP22]], i64 16
-; CHECK-NEXT:    [[TMP49:%.*]] = load double, ptr [[TMP48]], align 8, !alias.scope [[META78]]
-; CHECK-NEXT:    [[TMP50:%.*]] = insertelement <2 x double> [[TMP45]], double [[TMP49]], i32 1
-; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE3]]
-; CHECK:       [[PRED_LOAD_CONTINUE3]]:
-; CHECK-NEXT:    [[TMP51:%.*]] = phi <2 x double> [ [[TMP45]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP50]], %[[PRED_LOAD_IF2]] ]
-; CHECK-NEXT:    [[TMP53:%.*]] = fmul <2 x double> splat (double 2.000000e+01), [[TMP51]]
-; CHECK-NEXT:    [[TMP54:%.*]] = extractelement <2 x i1> [[TMP33]], i32 0
-; CHECK-NEXT:    br i1 [[TMP54]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0
+; CHECK-NEXT:    br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[GEP_DST2_ELSE:%.*]] = getelementptr i8, ptr [[GEP_DST1_ELSE]], i64 8
-; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <2 x double> [[TMP53]], i32 0
-; CHECK-NEXT:    store double [[TMP56]], ptr [[GEP_DST2_ELSE]], align 8, !alias.scope [[META81]], !noalias [[META78]]
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x double> [[TMP34]], i32 0
+; CHECK-NEXT:    store double [[TMP19]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT:    [[TMP57:%.*]] = extractelement <2 x i1> [[TMP33]], i32 1
-; CHECK-NEXT:    br i1 [[TMP57]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x i1> [[TMP16]], i32 1
+; CHECK-NEXT:    br i1 [[TMP20]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]]
+; CHECK:       [[PRED_STORE_IF2]]:
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <2 x double> [[TMP34]], i32 1
+; CHECK-NEXT:    store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE3]]
+; CHECK:       [[PRED_STORE_CONTINUE3]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
+; CHECK-NEXT:    br i1 [[TMP23]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
 ; CHECK:       [[PRED_STORE_IF4]]:
-; CHECK-NEXT:    [[TMP58:%.*]] = getelementptr i8, ptr [[TMP37]], i64 8
-; CHECK-NEXT:    [[TMP59:%.*]] = extractelement <2 x double> [[TMP53]], i32 1
-; CHECK-NEXT:    store double [[TMP59]], ptr [[TMP58]], align 8, !alias.scope [[META81]], !noalias [[META78]]
+; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT:    store double [[TMP13]], ptr [[TMP31]], align 8, !alias.scope [[META81]], !noalias [[META78]]
+; CHECK-NEXT:    [[TMP37:%.*]] = getelementptr i8, ptr [[TMP31]], i64 16
+; CHECK-NEXT:    store double 1.000000e+01, ptr [[TMP37]], align 8, !alias.scope [[META81]], !noalias [[META78]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE5]]
 ; CHECK:       [[PRED_STORE_CONTINUE5]]:
-; CHECK-NEXT:    [[TMP60:%.*]] = extractelement <2 x i1> [[TMP31]], i32 0
-; CHECK-NEXT:    br i1 [[TMP60]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
+; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]]
 ; CHECK:       [[PRED_STORE_IF6]]:
-; CHECK-NEXT:    [[TMP62:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
-; CHECK-NEXT:    store double [[TMP13]], ptr [[TMP62]], align 8, !alias.scope [[META81]], !noalias [[META78]]
-; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i8, ptr [[TMP62]], i64 8
-; CHECK-NEXT:    store double 1.000000e+01, ptr [[TMP64]], align 8, !alias.scope [[META81]], !noalias [[META78]]
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
+; CHECK-NEXT:    store double [[TMP14]], ptr [[TMP32]], align 8, !alias.scope [[META81]], !noalias [[META78]]
+; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16
+; CHECK-NEXT:    store double 1.000000e+01, ptr [[TMP47]], align 8, !alias.scope [[META81]], !noalias [[META78]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE7]]
 ; CHECK:       [[PRED_STORE_CONTINUE7]]:
-; CHECK-NEXT:    [[TMP66:%.*]] = extractelement <2 x i1> [[TMP31]], i32 1
-; CHECK-NEXT:    br i1 [[TMP66]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9]]
-; CHECK:       [[PRED_STORE_IF8]]:
-; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
-; CHECK-NEXT:    store double [[TMP14]], ptr [[TMP68]], align 8, !alias.scope [[META81]], !noalias [[META78]]
-; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i8, ptr [[TMP68]], i64 8
-; CHECK-NEXT:    store double 1.000000e+01, ptr [[TMP70]], align 8, !alias.scope [[META81]], !noalias [[META78]]
-; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE9]]
-; CHECK:       [[PRED_STORE_CONTINUE9]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
 ; CHECK-NEXT:    [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP52]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP83:![0-9]+]]
@@ -865,6 +836,117 @@ loop:
   %v.1 = load double, ptr %gep.src, align 8
   br i1 %cmp, label %then, label %else
 
+then:
+  %gep.dst1.then = getelementptr double, ptr %dst, i64 %iv
+  store double %v.1, ptr %gep.dst1.then, align 8
+  %gep.dst2.then = getelementptr i8, ptr %gep.dst1.then, i64 16
+  store double 10.0, ptr %gep.dst2.then, align 8
+  br label %loop.latch
+
+else:
+  %r.1 = fadd double %v.1, 8.0
+  %gep.dst1.else = getelementptr double, ptr %dst, i64 %iv
+  store double %r.1, ptr %gep.dst1.else, align 8
+  br label %loop.latch
+
+loop.latch:
+  %iv.next = add i64 %iv, 16
+  %exit.cond = icmp eq i64 %iv.next, 1600
+  br i1 %exit.cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Same as @sink_multiple_store_groups_noalias_via_scev, but the offset between
+; store groups is only 8, which means the alias across VFs.
+define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) {
+; CHECK-LABEL: define void @sink_multiple_store_groups_alias_via_scev(
+; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_MEMCHECK:.*]]
+; CHECK:       [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 12688
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 12828
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
+; CHECK-NEXT:    [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP1]]
+; CHECK-NEXT:    [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP3]], i64 152
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[GEP_FLAG]], align 4, !alias.scope [[META85:![0-9]+]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META85]]
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP9]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq <2 x i32> [[TMP11]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = load double, ptr [[GEP_SRC]], align 8, !alias.scope [[META85]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load double, ptr [[TMP3]], align 8, !alias.scope [[META85]]
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i32 1
+; CHECK-NEXT:    [[TMP17:%.*]] = xor <2 x i1> [[TMP12]], splat (i1 true)
+; CHECK-NEXT:    [[TMP18:%.*]] = fadd <2 x double> [[TMP16]], splat (double 8.000000e+00)
+; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <2 x i1> [[TMP17]], i32 0
+; CHECK-NEXT:    br i1 [[TMP36]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x double> [[TMP18]], i32 0
+; CHECK-NEXT:    store double [[TMP19]], ptr [[TMP20]], align 8, !alias.scope [[META88:![0-9]+]], !noalias [[META85]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <2 x i1> [[TMP17]], i32 1
+; CHECK-NEXT:    br i1 [[TMP39]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]]
+; CHECK:       [[PRED_STORE_IF2]]:
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[TMP18]], i32 1
+; CHECK-NEXT:    store double [[TMP22]], ptr [[TMP21]], align 8, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE3]]
+; CHECK:       [[PRED_STORE_CONTINUE3]]:
+; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
+; CHECK-NEXT:    br i1 [[TMP42]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
+; CHECK:       [[PRED_STORE_IF4]]:
+; CHECK-NEXT:    [[TMP43:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT:    store double [[TMP13]], ptr [[TMP43]], align 8, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT:    [[TMP44:%.*]] = getelementptr i8, ptr [[TMP43]], i64 8
+; CHECK-NEXT:    store double 1.000000e+01, ptr [[TMP44]], align 8, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE5]]
+; CHECK:       [[PRED_STORE_CONTINUE5]]:
+; CHECK-NEXT:    [[TMP45:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
+; CHECK-NEXT:    br i1 [[TMP45]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]]
+; CHECK:       [[PRED_STORE_IF6]]:
+; CHECK-NEXT:    [[TMP46:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP1]]
+; CHECK-NEXT:    store double [[TMP14]], ptr [[TMP46]], align 8, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr i8, ptr [[TMP46]], i64 8
+; CHECK-NEXT:    store double 1.000000e+01, ptr [[TMP47]], align 8, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE7]]
+; CHECK:       [[PRED_STORE_CONTINUE7]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP90:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br [[EXIT:label %.*]]
+; CHECK:       [[SCALAR_PH]]:
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %gep.src = getelementptr double, ptr %src, i64 %iv
+  %gep.flag = getelementptr i8, ptr %gep.src, i64 152
+  %c = load i32, ptr %gep.flag, align 4
+  %cmp = icmp eq i32 %c, 0
+  %v.1 = load double, ptr %gep.src, align 8
+  br i1 %cmp, label %then, label %else
+
 then:
   %gep.dst1.then = getelementptr double, ptr %dst, i64 %iv
   store double %v.1, ptr %gep.dst1.then, align 8
@@ -876,11 +958,6 @@ else:
   %r.1 = fadd double %v.1, 8.0
   %gep.dst1.else = getelementptr double, ptr %dst, i64 %iv
   store double %r.1, ptr %gep.dst1.else, align 8
-  %gep.src.16 = getelementptr i8, ptr %gep.src, i64 16
-  %v.3 = load double, ptr %gep.src.16, align 8
-  %r.2 = fmul double 20.0, %v.3
-  %gep.dst2.else = getelementptr i8, ptr %gep.dst1.else, i64 8
-  store double %r.2, ptr %gep.dst2.else, align 8
   br label %loop.latch
 
 loop.latch:
@@ -988,20 +1065,20 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META85:![0-9]+]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META92:![0-9]+]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
-; CHECK-NEXT:    store i32 1, ptr [[TMP5]], align 4, !alias.scope [[META88:![0-9]+]], !noalias [[META85]]
+; CHECK-NEXT:    store i32 1, ptr [[TMP5]], align 4, !alias.scope [[META95:![0-9]+]], !noalias [[META92]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP6]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]]
 ; CHECK:       [[PRED_STORE_IF2]]:
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]]
-; CHECK-NEXT:    store i32 1, ptr [[TMP7]], align 4, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT:    store i32 1, ptr [[TMP7]], align 4, !alias.scope [[META95]], !noalias [[META92]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE3]]
 ; CHECK:       [[PRED_STORE_CONTINUE3]]:
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
@@ -1012,14 +1089,14 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr
 ; CHECK-NEXT:    br i1 [[TMP12]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
 ; CHECK:       [[PRED_STORE_IF4]]:
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
-; CHECK-NEXT:    store i32 2, ptr [[TMP13]], align 4, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT:    store i32 2, ptr [[TMP13]], align 4, !alias.scope [[META95]], !noalias [[META92]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE5]]
 ; CHECK:       [[PRED_STORE_CONTINUE5]]:
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP14]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
 ; CHECK:       [[PRED_STORE_IF6]]:
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]]
-; CHECK-NEXT:    store i32 2, ptr [[TMP15]], align 4, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT:    store i32 2, ptr [[TMP15]], align 4, !alias.scope [[META95]], !noalias [[META92]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE7]]
 ; CHECK:       [[PRED_STORE_CONTINUE7]]:
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 9)
@@ -1028,19 +1105,19 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr
 ; CHECK-NEXT:    br i1 [[TMP18]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
 ; CHECK:       [[PRED_STORE_IF8]]:
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
-; CHECK-NEXT:    store i32 3, ptr [[TMP19]], align 4, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT:    store i32 3, ptr [[TMP19]], align 4, !alias.scope [[META95]], !noalias [[META92]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE9]]
 ; CHECK:       [[PRED_STORE_CONTINUE9]]:
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x i1> [[TMP17]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]]
 ; CHECK:       [[PRED_STORE_IF10]]:
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]]
-; CHECK-NEXT:    store i32 3, ptr [[TMP21]], align 4, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT:    store i32 3, ptr [[TMP21]], align 4, !alias.scope [[META95]], !noalias [[META92]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE11]]
 ; CHECK:       [[PRED_STORE_CONTINUE11]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
-; CHECK-NEXT:    br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP90:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP97:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br [[EXIT:label %.*]]
 ; CHECK:       [[SCALAR_PH]]:
diff --git a/llvm/unittests/CodeGen/GlobalISel/InstructionSelectTest.cpp b/llvm/unittests/CodeGen/GlobalISel/InstructionSelectTest.cpp
index 7fbccf7160e17..223798342b3ee 100644
--- a/llvm/unittests/CodeGen/GlobalISel/InstructionSelectTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/InstructionSelectTest.cpp
@@ -59,10 +59,8 @@ TEST_F(AArch64GISelMITest, TestInstructionSelectErase) {
     GTEST_SKIP();
 
   legacy::PassManager PM;
-  std::unique_ptr<TargetPassConfig> TPC(TM->createPassConfig(PM));
 
   EraseMockInstructionSelector ISel;
-  ISel.TPC = TPC.get();
   for (auto &MI : *EntryMBB) {
     ISel.MIs.push_back(&MI);
   }
diff --git a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn
index 679373d741661..ac63bbc6ee3b3 100644
--- a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn
@@ -35,6 +35,7 @@ static_library("Target") {
   sources = [
     "ABI.cpp",
     "AssertFrameRecognizer.cpp",
+    "BorrowedStackFrame.cpp",
     "CoreFileMemoryRanges.cpp",
     "DynamicRegisterInfo.cpp",
     "ExecutionContext.cpp",
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index 835025d1e319e..980f99687c4cc 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -17,7 +17,6 @@ CodeGen/RISCV/zmmul.ll
 CodeGen/WebAssembly/memory-interleave.ll
 CodeGen/X86/AMX/amx-low-intrinsics.ll
 CodeGen/X86/masked_gather_scatter.ll
-CodeGen/X86/nocfivalue.ll
 DebugInfo/AArch64/ir-outliner.ll
 DebugInfo/assignment-tracking/X86/hotcoldsplit.ll
 DebugInfo/Generic/block-asan.ll
@@ -148,9 +147,6 @@ Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll
 Transforms/ExpandMemCmp/AArch64/memcmp.ll
 Transforms/ExpandMemCmp/X86/memcmp.ll
 Transforms/ExpandMemCmp/X86/memcmp-x32.ll
-Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll
-Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll
-Transforms/ExpandVariadics/intrinsics.ll
 Transforms/FixIrreducible/basic.ll
 Transforms/FixIrreducible/bug45623.ll
 Transforms/FixIrreducible/callbr.ll
@@ -472,9 +468,6 @@ Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll
 Transforms/LoopIdiom/AArch64/byte-compare-index.ll
 Transforms/LoopIdiom/AArch64/find-first-byte.ll
 Transforms/LoopIdiom/RISCV/byte-compare-index.ll
-Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
-Transforms/LoopUnroll/peel-last-iteration-with-guards.ll
-Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
 Transforms/LowerAtomic/atomic-load.ll
 Transforms/LowerAtomic/atomic-swap.ll
 Transforms/LowerConstantIntrinsics/builtin-object-size-phi.ll
@@ -505,41 +498,15 @@ Transforms/LowerSwitch/do-not-handle-impossible-values.ll
 Transforms/LowerSwitch/feature.ll
 Transforms/LowerSwitch/fold-popular-case-to-unreachable-default.ll
 Transforms/LowerSwitch/pr59316.ll
-Transforms/LowerTypeTests/aarch64-jumptable.ll
-Transforms/LowerTypeTests/blockaddress-2.ll
-Transforms/LowerTypeTests/blockaddress.ll
-Transforms/LowerTypeTests/cfi-annotation.ll
 Transforms/LowerTypeTests/cfi-coff-comdat-rename.ll
-Transforms/LowerTypeTests/cfi-direct-call1.ll
-Transforms/LowerTypeTests/cfi-icall-alias.ll
-Transforms/LowerTypeTests/cfi-nounwind-direct-call.ll
-Transforms/LowerTypeTests/cfi-unwind-direct-call.ll
-Transforms/LowerTypeTests/export-alias.ll
-Transforms/LowerTypeTests/export-cross-dso-cfi.ll
-Transforms/LowerTypeTests/export-icall.ll
-Transforms/LowerTypeTests/export-rename-local.ll
-Transforms/LowerTypeTests/export-symver.ll
-Transforms/LowerTypeTests/function-arm-thumb.ll
-Transforms/LowerTypeTests/function-disjoint.ll
-Transforms/LowerTypeTests/function-ext.ll
 Transforms/LowerTypeTests/function.ll
-Transforms/LowerTypeTests/function-thumb-bti.ll
 Transforms/LowerTypeTests/function-weak.ll
-Transforms/LowerTypeTests/icall-branch-funnel.ll
 Transforms/LowerTypeTests/import.ll
-Transforms/LowerTypeTests/nocfivalue.ll
-Transforms/LowerTypeTests/pr37625.ll
-Transforms/LowerTypeTests/section.ll
 Transforms/LowerTypeTests/simple.ll
-Transforms/LowerTypeTests/x86-jumptable.ll
-Transforms/MemCpyOpt/memset-memcpy-dbgloc.ll
-Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
-Transforms/MemCpyOpt/opaque-ptr.ll
 Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
 Transforms/MergeFunc/apply_function_attributes.ll
 Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
 Transforms/MergeFunc/call-and-invoke-with-ranges.ll
-Transforms/MergeFunc/cfi-thunk-merging.ll
 Transforms/MergeFunc/comdat.ll
 Transforms/MergeFunc/crash-cast-arrays.ll
 Transforms/MergeFunc/crash.ll
@@ -572,10 +539,6 @@ Transforms/MergeFunc/ranges-multiple.ll
 Transforms/MergeFunc/self-referential-global.ll
 Transforms/MergeFunc/unnamed-addr-reprocessing.ll
 Transforms/MergeFunc/vector-GEP-crash.ll
-Transforms/MergeICmps/X86/alias-merge-blocks.ll
-Transforms/MergeICmps/X86/entry-block-shuffled-2.ll
-Transforms/MergeICmps/X86/entry-block-shuffled.ll
-Transforms/MergeICmps/X86/pr59740.ll
 Transforms/OpenMP/always_inline_device.ll
 Transforms/OpenMP/custom_state_machines.ll
 Transforms/OpenMP/custom_state_machines_remarks.ll
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 3581b07dc4e3e..16eaf28ddd95b 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1229,15 +1229,13 @@ def AMDGPU_ScaledMFMAOp :
 
 def AMDGPU_MakeDmaBaseOp :
     AMDGPU_Op<"make_dma_base", [Pure, AttrSizedOperandSegments]>,
-    Arguments<(ins
-                   Arg<AnyMemRef, "buffer to read from">:$src,
-                   Variadic<Index>:$src_indices,
-                   Arg<AnyMemRef, "buffer to write to">:$dst,
-                   Variadic<Index>:$dst_indices)>,
+    Arguments<(ins Arg<AnyMemRef>:$global,
+                   Variadic<Index>:$global_indices,
+                   Arg<AnyMemRef>:$lds,
+                   Variadic<Index>:$lds_indices)>,
     Results<(outs AMDGPU_TDMBaseType: $base)> {
 
   // TODO:
-  // * Add verifiers such that one of the memrefs is from LDS and the other global.
   // * Add verifiers to make sure that the number of indices do not exceed the number of dimensions.
 
   let summary = "Pair of based addresses used when moving tiles between LDS and global memory.";
@@ -1251,7 +1249,7 @@ def AMDGPU_MakeDmaBaseOp :
     For example:
 
     ```mlir
-      %base = amdgpu.make_dma_base %src[%idx0], %dst[%idx1] : memref<8xi32>, memref<8xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
+      %base = amdgpu.make_dma_base %global[%idx0, %idx1], %lds[%idx2, %idx3] : memref<64x64xi32>, memref<64x64xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
       %descriptor = amdgpu.make_dma_descriptor %base globalSize [2, 2] globalStride [2, 1] sharedSize [2, 2] : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
       amdgpu.tensor_load_to_lds %descriptor : !amdgpu.tdm_descriptor
     ```
@@ -1259,27 +1257,31 @@ def AMDGPU_MakeDmaBaseOp :
     to
 
     ```mlir
-       // pseudocode
-       %base_0 = llvm.mlir.undef : !llvm.struct<(ptr, ptr)>
-       %base_1 = llvm.insertvalue %global_addr, %base_0[0] : !llvm.struct<(ptr, ptr)>
-       %base_2 = llvm.insertvalue %lds_addr, %base_1[1] : !llvm.struct(ptr, ptr)>
-       // type(%base_2) = !llvm.struct<(ptr, ptr) roughly corresponds to amdgpu.tdm_base<i32>
-
-       // The base will be used when contructing dgroup0
-       // when lowering amdgpu.make_dma_descriptor
-       %dgroup0_0 = llvm.mlir.undef : !llvm.struct<(....)>
-       %dgroup0_1 = llvm.insertvalue %base2, %dgroup0_0 : ....
-
-       // When lowering amdgpu.tensor_load_to_lds
-       rocdl.tensor.load.to.lds %dgroup0, %dgroup1, %dgroup2, %dgroup3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
+      // pseudo-code
+      %global_base = llvm.extractvalue %global_memref[1]
+      %global_address = llvm.get_element_ptr ...
+
+      %lds_base = llvm.extractvalue %lds_memref[1]
+      %lds_address = llvm.get_element_ptr ...
+
+      // Definition of %base
+      %undef = llvm.mlir.undef : vector<4xi32>
+      %v0 = llvm.insertelement %15, %undef[0] : vector<4xi32>
+      %v1 = llvm.insertelement %lds_address, %v0[1] : vector<4xi32>
+      %v2 = llvm.insertelement %global_address_low, %v1[2] : vector<4xi32>
+      %base = llvm.insertelement %global_address_high, %v2[3] : vector<4xi32>
+
+      rocdl.tensor.load.to.lds %base, %dgroup1, %dgroup2, %dgroup3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
     ```
 
     These tensor DMA operations were introduced in gfx1250.
   }];
 
   let assemblyFormat = [{
-    $src `[` $src_indices `]` `,` $dst `[` $dst_indices `]` attr-dict `:` type($src) `,` type($dst) `->` type(results)
+    $global `[` $global_indices `]` `,` $lds `[` $lds_indices `]` attr-dict `:` type($global) `,` type($lds) `->` type(results)
   }];
+
+  let hasVerifier = 1;
 }
 
 def AMDGPU_MakeDmaDescriptorOp :
@@ -1323,12 +1325,12 @@ def AMDGPU_MakeDmaDescriptorOp :
 
      ```mlir
       // Example of moving a two-dimensional tensor to LDS.
-      %base = amdgpu.make_dma_base %src[0, 0], %dst[0, 0] : memref<64x64xi32>, memref<64x64xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
+      %base = amdgpu.make_dma_base %global[0, 0], %lds[0, 0] : memref<64x64xi32>, memref<64x64xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
       %descriptor = amdgpu.make_dma_descriptor %base globalSize [64, 64] globalStride [64, 1] sharedSize [64, 64] : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
       amdgpu.tensor_load_to_lds %descriptor : !amdgpu.tdm_descriptor
 
       // Example of moving a two dimension tensor to LDS where padding is applied after every integer.
-      %base = amdgpu.make_dma_base %src[0, 0], %dst[0, 0] : memref<32x32xi32>, memref<64x64xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
+      %base = amdgpu.make_dma_base %global[0, 0], %lds[0, 0] : memref<32x32xi32>, memref<64x64xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
       %descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padding(%pad pad_every %pad_every) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
       amdgpu.tensor_load_to_lds %descriptor : !amdgpu.tdm_descriptor
      ```
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index b8317b4a1d2ec..77d1a6f8d53b5 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -3232,6 +3232,18 @@ def OpenACC_RoutineOp : OpenACC_Op<"routine", [IsolatedFromAbove]> {
       OptionalAttr<DeviceTypeArrayAttr>:$gangDimDeviceType);
 
   let extraClassDeclaration = [{
+    // 'create' function to generate an 'empty' routine.
+    static RoutineOp create(::mlir::OpBuilder & builder,
+                            ::mlir::Location location,
+                            ::llvm::StringRef sym_name,
+                            mlir::SymbolRefAttr func_name, bool implicit) {
+      return create(builder, location, sym_name, func_name, /*bindIDName=*/{},
+                    /*bindStrName=*/{}, /*bindIdNameDeviceType=*/{},
+                    /*bindStrnameDeviceType=*/{}, /*worker=*/{}, /*vector=*/{},
+                    /*seq=*/{}, /*nohost=*/false, implicit, /*gang=*/{},
+                    /*gangDim=*/{}, /*gangDimDeviceType=*/{});
+    }
+
     static StringRef getGangDimKeyword() { return "dim"; }
 
     /// Return true if the op has the worker attribute for the
@@ -3267,6 +3279,13 @@ def OpenACC_RoutineOp : OpenACC_Op<"routine", [IsolatedFromAbove]> {
 
     std::optional<::std::variant<mlir::SymbolRefAttr, mlir::StringAttr>> getBindNameValue();
     std::optional<::std::variant<mlir::SymbolRefAttr, mlir::StringAttr>> getBindNameValue(mlir::acc::DeviceType deviceType);
+
+    // Add an entry to the 'seq' attribute for each additional device types.
+    void addSeq(MLIRContext *, llvm::ArrayRef<DeviceType>);
+    // Add an entry to the 'vector' attribute for each additional device types.
+    void addVector(MLIRContext *, llvm::ArrayRef<DeviceType>);
+    // Add an entry to the 'worker' attribute for each additional device types.
+    void addWorker(MLIRContext *, llvm::ArrayRef<DeviceType>);
   }];
 
   let assemblyFormat = [{
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td
index d1bbc7f206ce6..3f11bf6fbfce3 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td
@@ -176,6 +176,50 @@ def OpenACC_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> {
         return false;
       }]
     >,
+    InterfaceMethod<
+      /*description=*/[{
+        Generates a load operation from the pointer-like type. This dereferences
+        the pointer and returns the loaded value.
+
+        The `srcPtr` parameter is the pointer to load from. If the current type is
+        represented in a way that it does not capture the pointee type, `valueType`
+        must be passed in to provide the necessary type information.
+
+        Returns the loaded value, or an empty Value if load generation failed.
+      }],
+      /*retTy=*/"::mlir::Value",
+      /*methodName=*/"genLoad",
+      /*args=*/(ins "::mlir::OpBuilder &":$builder,
+                    "::mlir::Location":$loc,
+                    "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$srcPtr,
+                    "::mlir::Type":$valueType),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        return {};
+      }]
+    >,
+    InterfaceMethod<
+      /*description=*/[{
+        Generates a store operation to the pointer-like type. This stores a value
+        to the memory location pointed to by the pointer.
+
+        The `destPtr` parameter is the pointer to store to. The `valueToStore`
+        parameter is the value to be stored. The type information is derived from
+        the valueToStore parameter itself.
+
+        Returns true if store was successfully generated, false otherwise.
+      }],
+      /*retTy=*/"bool",
+      /*methodName=*/"genStore",
+      /*args=*/(ins "::mlir::OpBuilder &":$builder,
+                    "::mlir::Location":$loc,
+                    "::mlir::Value":$valueToStore,
+                    "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$destPtr),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        return false;
+      }]
+    >,
   ];
 }
 
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index b9a5e7d7f6eac..2b6938712dad2 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -2264,6 +2264,77 @@ struct AMDGPUPermlaneLowering : public ConvertOpToLLVMPattern<PermlaneSwapOp> {
   }
 };
 
+struct AMDGPUMakeDmaBaseLowering
+    : public ConvertOpToLLVMPattern<MakeDmaBaseOp> {
+  using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
+
+  AMDGPUMakeDmaBaseLowering(const LLVMTypeConverter &converter, Chipset chipset)
+      : ConvertOpToLLVMPattern<MakeDmaBaseOp>(converter), chipset(chipset) {}
+  Chipset chipset;
+
+  LogicalResult
+  matchAndRewrite(MakeDmaBaseOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (chipset < kGfx1250)
+      return op->emitOpError("make_dma_base is only supported on gfx1250");
+
+    Location loc = op.getLoc();
+
+    ValueRange ldsIndices = adaptor.getLdsIndices();
+    Value lds = adaptor.getLds();
+    auto ldsMemRefType = cast<MemRefType>(op.getLds().getType());
+
+    Value ldsPtr =
+        getStridedElementPtr(rewriter, loc, ldsMemRefType, lds, ldsIndices);
+
+    ValueRange globalIndices = adaptor.getGlobalIndices();
+    Value global = adaptor.getGlobal();
+    auto globalMemRefType = cast<MemRefType>(op.getGlobal().getType());
+
+    Value globalPtr = getStridedElementPtr(rewriter, loc, globalMemRefType,
+                                           global, globalIndices);
+
+    Type i32 = rewriter.getI32Type();
+    Type i64 = rewriter.getI64Type();
+
+    Value castForLdsAddr = LLVM::PtrToIntOp::create(rewriter, loc, i32, ldsPtr);
+    Value castForGlobalAddr =
+        LLVM::PtrToIntOp::create(rewriter, loc, i64, globalPtr);
+
+    Value lowHalf =
+        LLVM::TruncOp::create(rewriter, loc, i32, castForGlobalAddr);
+
+    Value shift = LLVM::LShrOp::create(rewriter, loc, castForGlobalAddr,
+                                       createI64Constant(rewriter, loc, 32));
+
+    Value highHalf = LLVM::TruncOp::create(rewriter, loc, i32, shift);
+
+    Value mask = createI32Constant(rewriter, loc, (1ull << 25) - 1);
+    Value validHighHalf = LLVM::AndOp::create(rewriter, loc, highHalf, mask);
+
+    Value typeField = createI32Constant(rewriter, loc, 2 << 30);
+    Value highHalfPlusType =
+        LLVM::OrOp::create(rewriter, loc, validHighHalf, typeField);
+
+    Value c0 = createI32Constant(rewriter, loc, 0);
+    Value c1 = createI32Constant(rewriter, loc, 1);
+    Value c2 = createI32Constant(rewriter, loc, 2);
+    Value c3 = createI32Constant(rewriter, loc, 3);
+
+    Type v4i32 = this->typeConverter->convertType(VectorType::get(4, i32));
+    Value result = LLVM::PoisonOp::create(rewriter, loc, v4i32);
+    result = LLVM::InsertElementOp::create(rewriter, loc, result, c1, c0);
+    result = LLVM::InsertElementOp::create(rewriter, loc, result,
+                                           castForLdsAddr, c1);
+    result = LLVM::InsertElementOp::create(rewriter, loc, result, lowHalf, c2);
+    result = LLVM::InsertElementOp::create(rewriter, loc, result,
+                                           highHalfPlusType, c3);
+
+    rewriter.replaceOp(op, result);
+    return success();
+  }
+};
+
 struct ConvertAMDGPUToROCDLPass
     : public impl::ConvertAMDGPUToROCDLPassBase<ConvertAMDGPUToROCDLPass> {
   using Base::Base;
@@ -2278,6 +2349,10 @@ struct ConvertAMDGPUToROCDLPass
 
     RewritePatternSet patterns(ctx);
     LLVMTypeConverter converter(ctx);
+    converter.addConversion([&](TDMBaseType type) -> Type {
+      Type i32 = IntegerType::get(type.getContext(), 32);
+      return converter.convertType(VectorType::get(4, i32));
+    });
     populateAMDGPUToROCDLConversionPatterns(converter, patterns, *maybeChipset);
     LLVMConversionTarget target(getContext());
     target.addIllegalDialect<::mlir::amdgpu::AMDGPUDialect>();
@@ -2333,6 +2408,7 @@ void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
            ScaledExtPackedOpLowering, PackedScaledTruncOpLowering,
            PackedTrunc2xFp8OpLowering, PackedStochRoundFp8OpLowering,
            GatherToLDSOpLowering, TransposeLoadOpLowering,
-           AMDGPUPermlaneLowering>(converter, chipset);
+           AMDGPUPermlaneLowering, AMDGPUMakeDmaBaseLowering>(converter,
+                                                              chipset);
   patterns.add<AMDGPUSwizzleBitModeLowering>(converter);
 }
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index 93cb9b38a5ecf..8b58c3b1dd182 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -705,6 +705,24 @@ LogicalResult TransposeLoadOp::verify() {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// MakeDmaBaseOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult MakeDmaBaseOp::verify() {
+  MemRefType ldsType = cast<MemRefType>(getLds().getType());
+  MemRefType globalType = cast<MemRefType>(getGlobal().getType());
+  if (!hasWorkgroupMemorySpace(ldsType.getMemorySpace())) {
+    return emitOpError(
+        "lds memref must have workgroup address space attribute.");
+  }
+  if (!hasGlobalMemorySpace(globalType.getMemorySpace())) {
+    return emitOpError(
+        "global memref must have global address space attribute.");
+  }
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // MakeDmaDescriptorOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/MaskedloadToLoad.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/MaskedloadToLoad.cpp
index f15c63c166e0a..89ef51f922cad 100644
--- a/mlir/lib/Dialect/AMDGPU/Transforms/MaskedloadToLoad.cpp
+++ b/mlir/lib/Dialect/AMDGPU/Transforms/MaskedloadToLoad.cpp
@@ -33,19 +33,18 @@ using namespace mlir::amdgpu;
 
 /// This pattern supports lowering of: `vector.maskedload` to `vector.load`
 /// and `arith.select` if the memref is in buffer address space.
-static LogicalResult baseInBufferAddrSpace(PatternRewriter &rewriter,
-                                           vector::MaskedLoadOp maskedOp) {
-  auto memRefType = dyn_cast<MemRefType>(maskedOp.getBase().getType());
+static LogicalResult hasBufferAddressSpace(Type type) {
+  auto memRefType = dyn_cast<MemRefType>(type);
   if (!memRefType)
-    return rewriter.notifyMatchFailure(maskedOp, "not a memref source");
+    return failure();
 
   Attribute addrSpace = memRefType.getMemorySpace();
   if (!isa_and_nonnull<amdgpu::AddressSpaceAttr>(addrSpace))
-    return rewriter.notifyMatchFailure(maskedOp, "no address space");
+    return failure();
 
   if (dyn_cast<amdgpu::AddressSpaceAttr>(addrSpace).getValue() !=
       amdgpu::AddressSpace::FatRawBuffer)
-    return rewriter.notifyMatchFailure(maskedOp, "not in buffer address space");
+    return failure();
 
   return success();
 }
@@ -83,10 +82,11 @@ struct MaskedLoadLowering final : OpRewritePattern<vector::MaskedLoadOp> {
   LogicalResult matchAndRewrite(vector::MaskedLoadOp maskedOp,
                                 PatternRewriter &rewriter) const override {
     if (maskedOp->hasAttr(kMaskedloadNeedsMask))
-      return failure();
+      return rewriter.notifyMatchFailure(maskedOp, "already rewritten");
 
-    if (failed(baseInBufferAddrSpace(rewriter, maskedOp))) {
-      return failure();
+    if (failed(hasBufferAddressSpace(maskedOp.getBase().getType()))) {
+      return rewriter.notifyMatchFailure(
+          maskedOp, "isn't a load from a fat buffer resource");
     }
 
     // Check if this is either a full inbounds load or an empty, oob load. If
@@ -176,9 +176,14 @@ struct FullMaskedLoadToConditionalLoad
 
   LogicalResult matchAndRewrite(vector::MaskedLoadOp loadOp,
                                 PatternRewriter &rewriter) const override {
+    if (succeeded(hasBufferAddressSpace(loadOp.getBase().getType())))
+      return rewriter.notifyMatchFailure(
+          loadOp, "buffer loads are handled by a more specialized pattern");
+
     FailureOr<Value> maybeCond = matchFullMask(rewriter, loadOp.getMask());
     if (failed(maybeCond)) {
-      return failure();
+      return rewriter.notifyMatchFailure(loadOp,
+                                         "isn't loading a broadcasted scalar");
     }
 
     Value cond = maybeCond.value();
@@ -203,6 +208,15 @@ struct FullMaskedStoreToConditionalStore
 
   LogicalResult matchAndRewrite(vector::MaskedStoreOp storeOp,
                                 PatternRewriter &rewriter) const override {
+    // A condition-free implementation of fully masked stores requires
+    // 1) an accessor for the num_records field on buffer resources/fat pointers
+    // 2) knowledge that said field will always be set accurately - that is,
+    // that writes to x < num_records of offset wouldn't trap, which is
+    // something a pattern user would need to assert or we'd need to prove.
+    //
+    // Therefore, conditional stores to buffers still go down this path at
+    // present.
+
     FailureOr<Value> maybeCond = matchFullMask(rewriter, storeOp.getMask());
     if (failed(maybeCond)) {
       return failure();
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 841d1d781f1a1..7039bbe1d11ec 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -203,12 +203,68 @@ struct MemRefPointerLikeModel
 
     return false;
   }
+
+  mlir::Value genLoad(Type pointer, OpBuilder &builder, Location loc,
+                      TypedValue<PointerLikeType> srcPtr,
+                      Type valueType) const {
+    // Load from a memref - only valid for scalar memrefs (rank 0).
+    // This is because the address computation for memrefs is part of the load
+    // (and not computed separately), but the API does not have arguments for
+    // indexing.
+    auto memrefValue = dyn_cast_if_present<TypedValue<MemRefType>>(srcPtr);
+    if (!memrefValue)
+      return {};
+
+    auto memrefTy = memrefValue.getType();
+
+    // Only load from scalar memrefs (rank 0)
+    if (memrefTy.getRank() != 0)
+      return {};
+
+    return memref::LoadOp::create(builder, loc, memrefValue);
+  }
+
+  bool genStore(Type pointer, OpBuilder &builder, Location loc,
+                Value valueToStore, TypedValue<PointerLikeType> destPtr) const {
+    // Store to a memref - only valid for scalar memrefs (rank 0)
+    // This is because the address computation for memrefs is part of the store
+    // (and not computed separately), but the API does not have arguments for
+    // indexing.
+    auto memrefValue = dyn_cast_if_present<TypedValue<MemRefType>>(destPtr);
+    if (!memrefValue)
+      return false;
+
+    auto memrefTy = memrefValue.getType();
+
+    // Only store to scalar memrefs (rank 0)
+    if (memrefTy.getRank() != 0)
+      return false;
+
+    memref::StoreOp::create(builder, loc, valueToStore, memrefValue);
+    return true;
+  }
 };
 
 struct LLVMPointerPointerLikeModel
     : public PointerLikeType::ExternalModel<LLVMPointerPointerLikeModel,
                                             LLVM::LLVMPointerType> {
   Type getElementType(Type pointer) const { return Type(); }
+
+  mlir::Value genLoad(Type pointer, OpBuilder &builder, Location loc,
+                      TypedValue<PointerLikeType> srcPtr,
+                      Type valueType) const {
+    // For LLVM pointers, we need the valueType to determine what to load
+    if (!valueType)
+      return {};
+
+    return LLVM::LoadOp::create(builder, loc, valueType, srcPtr);
+  }
+
+  bool genStore(Type pointer, OpBuilder &builder, Location loc,
+                Value valueToStore, TypedValue<PointerLikeType> destPtr) const {
+    LLVM::StoreOp::create(builder, loc, valueToStore, destPtr);
+    return true;
+  }
 };
 
 struct MemrefAddressOfGlobalModel
@@ -4293,6 +4349,24 @@ RoutineOp::getGangDimValue(mlir::acc::DeviceType deviceType) {
   return std::nullopt;
 }
 
+void RoutineOp::addSeq(MLIRContext *context,
+                       llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
+  setSeqAttr(addDeviceTypeAffectedOperandHelper(context, getSeqAttr(),
+                                                effectiveDeviceTypes));
+}
+
+void RoutineOp::addVector(MLIRContext *context,
+                          llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
+  setVectorAttr(addDeviceTypeAffectedOperandHelper(context, getVectorAttr(),
+                                                   effectiveDeviceTypes));
+}
+
+void RoutineOp::addWorker(MLIRContext *context,
+                          llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
+  setWorkerAttr(addDeviceTypeAffectedOperandHelper(context, getWorkerAttr(),
+                                                   effectiveDeviceTypes));
+}
+
 //===----------------------------------------------------------------------===//
 // InitOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index f2b0e71c9397f..59a1ad9dbe189 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -517,8 +517,7 @@ void LayoutInfoPropagation::visitPrefetchNdOp(
     auto [bWidth, bHeight, bCount] = blockWHC.value();
     SmallVector<int> instData;
     int instWidth = xegpu::getLargestDivisor(
-        static_cast<int>(tdescTy.getDimSize(tdescTy.getRank() - 1)), bWidth,
-        bCount);
+        static_cast<int>(tdescTy.getDimSize(tdescTy.getRank() - 1)), bWidth);
     if (instWidth == -1)
       prefetch.emitWarning(
           "No suitable instruction multiple found for the given shape.");
@@ -759,8 +758,7 @@ void LayoutInfoPropagation::visitStoreNdOp(
     auto [bWidth, bHeight, bCount] = blockWHC.value();
     SmallVector<int> instData;
     int instWidth = xegpu::getLargestDivisor(
-        static_cast<int>(dataTy.getDimSize(dataTy.getRank() - 1)), bWidth,
-        bCount);
+        static_cast<int>(dataTy.getDimSize(dataTy.getRank() - 1)), bWidth);
     if (instWidth == -1)
       store.emitWarning(
           "No suitable instruction multiple found for the given shape.");
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/cvt_scale_pk-gfx1250.mlir b/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
similarity index 81%
rename from mlir/test/Conversion/AMDGPUToROCDL/cvt_scale_pk-gfx1250.mlir
rename to mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
index d2391140ce056..27daea58f8f92 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/cvt_scale_pk-gfx1250.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
@@ -162,3 +162,51 @@ func.func @amdgpu.scaled_ext_packed816_invalid_dst_elem_type(%v: vector<16xf6E3M
   %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E3M2FN>, vector<4xf8E8M0FNU> -> vector<16xf64>
   return %ret0: vector<16xf64>
 }
+
+// -----
+
+#gpu_global_addrspace = 1
+#gpu_lds_addrspace = 3
+#amdgpu_fat_buffer_addrspace = 7
+
+// CHECK-LABEL: func @make_dma_base
+// CHECK-SAME: (%[[IDX:.+]]: index, %[[MEM:.+]]: memref<8xi32, 1>, %[[SMEM:.+]]: memref<8xi32, 3>)
+func.func @make_dma_base(%idx: index, %mem: memref<8xi32, #gpu_global_addrspace>, %smem: memref<8xi32,#gpu_lds_addrspace>) -> (!amdgpu.tdm_base<i32>) {
+  // CHECK-DAG: %[[INT:.+]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64
+  // CHECK-DAG: %[[MEMREF_DESC_MEM:.+]] = builtin.unrealized_conversion_cast %[[MEM]] : memref<8xi32, 1>
+  // CHECK-DAG: %[[MEMREF_DESC_SMEM:.+]] = builtin.unrealized_conversion_cast %[[SMEM]] : memref<8xi32, 3>
+
+  // CHECK-DAG: %[[MEM_BASE_PTR:.+]] = llvm.extractvalue %[[MEMREF_DESC_MEM]][1] : !llvm.struct<(ptr<1>
+  // CHECK-DAG: %[[SMEM_BASE_PTR:.+]] = llvm.extractvalue %[[MEMREF_DESC_SMEM]][1] : !llvm.struct<(ptr<3>
+
+  // CHECK-DAG: %[[MEM_BASE_OFFSET:.+]] = llvm.getelementptr %[[MEM_BASE_PTR]][%[[INT]]]
+  // CHECK-DAG: %[[SMEM_BASE_OFFSET:.+]] = llvm.getelementptr %[[SMEM_BASE_PTR]][%[[INT]]]
+
+  // CHECK-DAG: %[[MEM_INT:.+]] = llvm.ptrtoint %[[MEM_BASE_OFFSET]] : !llvm.ptr<1> to i64
+  // CHECK-DAG: %[[SMEM_INT:.+]] = llvm.ptrtoint %[[SMEM_BASE_OFFSET]] : !llvm.ptr<3> to i32
+
+  // CHECK: %[[MEM_INT_LOW:.+]] = llvm.trunc %[[MEM_INT]] : i64 to i32
+  // CHECK-DAG: %[[SHIFT:.+]] = llvm.mlir.constant(32 : i64)
+  // CHECK: %[[SHIFTED_MEM_INT:.+]] = llvm.lshr %[[MEM_INT]], %[[SHIFT]]
+  // CHECK: %[[MEM_INT_HIGH:.+]] = llvm.trunc %[[SHIFTED_MEM_INT]] : i64 to i32
+  // CHECK-DAG: %[[MASK:.+]] = llvm.mlir.constant(33554431 : i32)
+  // CHECK: %[[VALID_MEM_INT_HIGH:.+]] = llvm.and %[[MEM_INT_HIGH]], %[[MASK]]
+
+  // CHECK-DAG: %[[TYPE_FIELD:.+]] = llvm.mlir.constant(-2147483648 : i32)
+  // CHECK: %[[MEM_INT_HIGH_TYPE:.+]] = llvm.or %[[VALID_MEM_INT_HIGH]], %[[TYPE_FIELD]]
+
+  // CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(0 : i32) : i32
+  // CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[C2:.+]] = llvm.mlir.constant(2 : i32) : i32
+  // CHECK-DAG: %[[C3:.+]] = llvm.mlir.constant(3 : i32) : i32
+
+  // CHECK: %[[V4I32_0_0:.+]] = llvm.mlir.poison : vector<4xi32>
+  // CHECK: %[[V4I32_0_1:.+]] = llvm.insertelement %[[C1]], %[[V4I32_0_0]][%[[C0]] : i32]
+  // CHECK: %[[V4I32_0_2:.+]] = llvm.insertelement %[[SMEM_INT]], %[[V4I32_0_1]][%[[C1]] : i32]
+  // CHECK: %[[V4I32_0_3:.+]] = llvm.insertelement %[[MEM_INT_LOW]], %[[V4I32_0_2]][%[[C2]] : i32]
+  // CHECK: %[[V4I32_0_4:.+]] = llvm.insertelement %[[MEM_INT_HIGH_TYPE]], %[[V4I32_0_3]][%[[C3]] : i32]
+
+  %0 = amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi32, #gpu_global_addrspace>, memref<8xi32, #gpu_lds_addrspace> -> !amdgpu.tdm_base<i32>
+
+  func.return %0 : !amdgpu.tdm_base<i32>
+}
diff --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir
index 5b3a79d14cb1a..b915bfa324c77 100644
--- a/mlir/test/Dialect/AMDGPU/invalid.mlir
+++ b/mlir/test/Dialect/AMDGPU/invalid.mlir
@@ -357,6 +357,20 @@ func.func @scaled_mfma_invalid_k(%arg0 : vector<4xf8E8M0FNU>, %arg1 : vector<32x
 
 // -----
 
+func.func @make_dma_base_invalid_addressspace(%idx: index, %mem: memref<8xi32>) {
+  // expected-error@+1 {{'amdgpu.make_dma_base' op lds memref must have workgroup address space attribute.}}
+  amdgpu.make_dma_base %mem[%idx], %mem[%idx] : memref<8xi32>, memref<8xi32> -> !amdgpu.tdm_base<i32>
+}
+
+// -----
+
+func.func @make_dma_base_invalid_addressspace(%idx: index, %smem : memref<8xi32, #gpu.address_space<workgroup>>) {
+  // expected-error@+1 {{'amdgpu.make_dma_base' op global memref must have global address space attribute.}}
+  amdgpu.make_dma_base %smem[%idx], %smem[%idx] : memref<8xi32, #gpu.address_space<workgroup>>, memref<8xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
+}
+
+// -----
+
 func.func @make_dma_base_invalid_barrier(%base: !amdgpu.tdm_base<i32>, %barrier: memref<8xi32>, %idx: index) {
   // expected-error@+1 {{'amdgpu.make_dma_descriptor' op atomic barrier address must be in LDS.}}
   amdgpu.make_dma_descriptor %base globalSize [0] globalStride [1] sharedSize [0] atomicBarrier(%barrier[%idx] : memref<8xi32>) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 390ad8cb8c1a5..3260bd4a8df9a 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -691,9 +691,6 @@ func.func @memory_counter_wait() {
 func.func @make_dma_base(%idx: index, %mem: memref<8xi32>, %smem: memref<8xi32, #gpu.address_space<workgroup>>) {
   // CHECK: amdgpu.make_dma_base %[[MEM]][%[[IDX]]], %[[SMEM]][%[[IDX]]] : memref<8xi32>, memref<8xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
   amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi32>, memref<8xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
-
-  // CHECK: amdgpu.make_dma_base %[[SMEM]][%[[IDX]]], %[[MEM]][%[[IDX]]] : memref<8xi32, #gpu.address_space<workgroup>>, memref<8xi32> -> !amdgpu.tdm_base<i32>
-  amdgpu.make_dma_base %smem[%idx], %mem[%idx] : memref<8xi32, #gpu.address_space<workgroup>>, memref<8xi32> -> !amdgpu.tdm_base<i32>
   func.return
 }
 
@@ -748,3 +745,4 @@ func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>, %barrier: memref<8x
 
   func.return
 }
+
diff --git a/mlir/test/Dialect/OpenACC/pointer-like-interface-load.mlir b/mlir/test/Dialect/OpenACC/pointer-like-interface-load.mlir
new file mode 100644
index 0000000000000..36df6a1d1bbe3
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/pointer-like-interface-load.mlir
@@ -0,0 +1,29 @@
+// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(func.func(test-acc-pointer-like-interface{test-mode=load}))" 2>&1 | FileCheck %s
+
+func.func @test_memref_load_scalar() {
+  %ptr = memref.alloca() {test.ptr} : memref<f32>
+  // CHECK: Successfully generated load for operation: %[[PTR:.*]] = memref.alloca() {test.ptr} : memref<f32>
+  // CHECK: Loaded value type: f32
+  // CHECK: Generated: %{{.*}} = memref.load %[[PTR]][] : memref<f32>
+  return
+}
+
+// -----
+
+func.func @test_memref_load_int() {
+  %ptr = memref.alloca() {test.ptr} : memref<i64>
+  // CHECK: Successfully generated load for operation: %[[PTR:.*]] = memref.alloca() {test.ptr} : memref<i64>
+  // CHECK: Loaded value type: i64
+  // CHECK: Generated: %{{.*}} = memref.load %[[PTR]][] : memref<i64>
+  return
+}
+
+// -----
+
+func.func @test_memref_load_dynamic() {
+  %c10 = arith.constant 10 : index
+  %ptr = memref.alloc(%c10) {test.ptr} : memref<?xf32>
+  // CHECK: Failed to generate load for operation: %[[PTR:.*]] = memref.alloc(%{{.*}}) {test.ptr} : memref<?xf32>
+  return
+}
+
diff --git a/mlir/test/Dialect/OpenACC/pointer-like-interface-store.mlir b/mlir/test/Dialect/OpenACC/pointer-like-interface-store.mlir
new file mode 100644
index 0000000000000..0fee43102d6d9
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/pointer-like-interface-store.mlir
@@ -0,0 +1,39 @@
+// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(func.func(test-acc-pointer-like-interface{test-mode=store}))" 2>&1 | FileCheck %s
+
+func.func @test_memref_store_scalar() {
+  %ptr = memref.alloca() {test.ptr} : memref<f32>
+  // CHECK: Successfully generated store for operation: %[[PTR:.*]] = memref.alloca() {test.ptr} : memref<f32>
+  // CHECK: Generated: %[[VAL:.*]] = arith.constant 4.200000e+01 : f32
+  // CHECK: Generated: memref.store %[[VAL]], %[[PTR]][] : memref<f32>
+  return
+}
+
+// -----
+
+func.func @test_memref_store_int() {
+  %ptr = memref.alloca() {test.ptr} : memref<i32>
+  // CHECK: Successfully generated store for operation: %[[PTR:.*]] = memref.alloca() {test.ptr} : memref<i32>
+  // CHECK: Generated: %[[VAL:.*]] = arith.constant 42 : i32
+  // CHECK: Generated: memref.store %[[VAL]], %[[PTR]][] : memref<i32>
+  return
+}
+
+// -----
+
+func.func @test_memref_store_i64() {
+  %ptr = memref.alloca() {test.ptr} : memref<i64>
+  // CHECK: Successfully generated store for operation: %[[PTR:.*]] = memref.alloca() {test.ptr} : memref<i64>
+  // CHECK: Generated: %[[VAL:.*]] = arith.constant 42 : i64
+  // CHECK: Generated: memref.store %[[VAL]], %[[PTR]][] : memref<i64>
+  return
+}
+
+// -----
+
+func.func @test_memref_store_dynamic() {
+  %c10 = arith.constant 10 : index
+  %ptr = memref.alloc(%c10) {test.ptr} : memref<?xf32>
+  // CHECK: Failed to generate store for operation: %[[PTR:.*]] = memref.alloc(%{{.*}}) {test.ptr} : memref<?xf32>
+  return
+}
+
diff --git a/mlir/test/Dialect/Vector/vector-sink.mlir b/mlir/test/Dialect/Vector/vector-sink.mlir
index 577b06df42929..beaba52af1841 100644
--- a/mlir/test/Dialect/Vector/vector-sink.mlir
+++ b/mlir/test/Dialect/Vector/vector-sink.mlir
@@ -780,7 +780,7 @@ func.func @negative_extract_load_scalable(%arg0: memref<?xf32>, %arg1: index) ->
 }
 
 //-----------------------------------------------------------------------------
-// [Pattern: StoreOpFromSplatOrBroadcast]
+// [Pattern: StoreOpFromBroadcast]
 //-----------------------------------------------------------------------------
 
 // CHECK-LABEL: @store_splat
diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir
index d911baa49acbb..32fb3178a8af2 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir
@@ -6,6 +6,8 @@
 // CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<8x16xf32>
 // CHECK: %[[TDESC_SRC:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32, #xegpu.layout<inst_data = [8, 16]>>
 // CHECK: %[[TDESC_DST:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32, #xegpu.layout<inst_data = [8, 16]>>
+// CHECK: xegpu.prefetch_nd %[[TDESC_SRC]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, layout = #xegpu.layout<inst_data = [8, 16]>}> :
+// CHECK-SAME: !xegpu.tensor_desc<8x32xf32, #xegpu.layout<inst_data = [8, 16]>>
 // CHECK: %[[LOADED:.*]] = xegpu.load_nd %0 <{layout = #xegpu.layout<inst_data = [8, 16]>}> {layout_result_0 = #xegpu.layout<inst_data = [8, 16]>} :
 // CHECK-SAME: !xegpu.tensor_desc<8x32xf32, #xegpu.layout<inst_data = [8, 16]>> -> vector<8x32xf32>
 // CHECK: xegpu.store_nd %[[LOADED]], %[[TDESC_DST]] <{layout = #xegpu.layout<inst_data = [8, 16]>}> : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32, #xegpu.layout<inst_data = [8, 16]>>
@@ -16,6 +18,7 @@ func.func @load_store_no_array_len(%arg0: memref<8x32xf32>, %arg1: memref<8x32xf
   %cst = arith.constant dense<0.000000e+00> : vector<8x16xf32>
   %0 = xegpu.create_nd_tdesc %arg0 : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32>
   %1 = xegpu.create_nd_tdesc %arg1 : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32>
+  xegpu.prefetch_nd %0 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>: !xegpu.tensor_desc<8x32xf32>
   %2 = xegpu.load_nd %0  : !xegpu.tensor_desc<8x32xf32> -> vector<8x32xf32>
   xegpu.store_nd %2, %1  : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32>
   return
diff --git a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
index 027b0a1a8b80b..3ff0dc85b2152 100644
--- a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
+++ b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
@@ -46,7 +46,7 @@ struct TestPointerLikeTypeInterfacePass
 
   Pass::Option<std::string> testMode{
       *this, "test-mode",
-      llvm::cl::desc("Test mode: walk, alloc, copy, or free"),
+      llvm::cl::desc("Test mode: walk, alloc, copy, free, load, or store"),
       llvm::cl::init("walk")};
 
   StringRef getArgument() const override {
@@ -75,6 +75,10 @@ struct TestPointerLikeTypeInterfacePass
   void testGenCopy(Operation *srcOp, Operation *destOp, Value srcResult,
                    Value destResult, PointerLikeType pointerType,
                    OpBuilder &builder);
+  void testGenLoad(Operation *op, Value result, PointerLikeType pointerType,
+                   OpBuilder &builder);
+  void testGenStore(Operation *op, Value result, PointerLikeType pointerType,
+                    OpBuilder &builder, Value providedValue = {});
 
   struct PointerCandidate {
     Operation *op;
@@ -92,9 +96,12 @@ void TestPointerLikeTypeInterfacePass::runOnOperation() {
   auto func = getOperation();
   OpBuilder builder(&getContext());
 
-  if (testMode == "alloc" || testMode == "free") {
+  if (testMode == "alloc" || testMode == "free" || testMode == "load" ||
+      testMode == "store") {
     // Collect all candidates first
     SmallVector<PointerCandidate> candidates;
+    // For store mode, also look for a test value to use
+    Value testValue;
     func.walk([&](Operation *op) {
       if (op->hasAttr("test.ptr")) {
         for (auto result : op->getResults()) {
@@ -105,6 +112,11 @@ void TestPointerLikeTypeInterfacePass::runOnOperation() {
           }
         }
       }
+      // Collect value marked with test.value for store tests
+      if (testMode == "store" && op->hasAttr("test.value")) {
+        if (op->getNumResults() > 0)
+          testValue = op->getResult(0);
+      }
     });
 
     // Now test all candidates
@@ -115,6 +127,12 @@ void TestPointerLikeTypeInterfacePass::runOnOperation() {
       else if (testMode == "free")
         testGenFree(candidate.op, candidate.result, candidate.pointerType,
                     builder);
+      else if (testMode == "load")
+        testGenLoad(candidate.op, candidate.result, candidate.pointerType,
+                    builder);
+      else if (testMode == "store")
+        testGenStore(candidate.op, candidate.result, candidate.pointerType,
+                     builder, testValue);
     }
   } else if (testMode == "copy") {
     // Collect all source and destination candidates
@@ -292,6 +310,105 @@ void TestPointerLikeTypeInterfacePass::testGenCopy(
   }
 }
 
+void TestPointerLikeTypeInterfacePass::testGenLoad(Operation *op, Value result,
+                                                   PointerLikeType pointerType,
+                                                   OpBuilder &builder) {
+  Location loc = op->getLoc();
+
+  // Create a new builder with the listener and set insertion point
+  OperationTracker tracker;
+  OpBuilder newBuilder(op->getContext());
+  newBuilder.setListener(&tracker);
+  newBuilder.setInsertionPointAfter(op);
+
+  // Call the genLoad API
+  auto typedResult = cast<TypedValue<PointerLikeType>>(result);
+  Value loadRes = pointerType.genLoad(newBuilder, loc, typedResult, Type());
+
+  if (loadRes) {
+    llvm::errs() << "Successfully generated load for operation: ";
+    op->print(llvm::errs());
+    llvm::errs() << "\n";
+    llvm::errs() << "\tLoaded value type: ";
+    loadRes.getType().print(llvm::errs());
+    llvm::errs() << "\n";
+
+    // Print all operations that were inserted
+    for (Operation *insertedOp : tracker.insertedOps) {
+      llvm::errs() << "\tGenerated: ";
+      insertedOp->print(llvm::errs());
+      llvm::errs() << "\n";
+    }
+  } else {
+    llvm::errs() << "Failed to generate load for operation: ";
+    op->print(llvm::errs());
+    llvm::errs() << "\n";
+  }
+}
+
+void TestPointerLikeTypeInterfacePass::testGenStore(Operation *op, Value result,
+                                                    PointerLikeType pointerType,
+                                                    OpBuilder &builder,
+                                                    Value providedValue) {
+  Location loc = op->getLoc();
+
+  // Create a new builder with the listener and set insertion point
+  OperationTracker tracker;
+  OpBuilder newBuilder(op->getContext());
+  newBuilder.setListener(&tracker);
+  newBuilder.setInsertionPointAfter(op);
+
+  // Use provided value if available, otherwise create a constant
+  Value valueToStore = providedValue;
+  if (!valueToStore) {
+    // Create a test value to store - use a constant matching the element type
+    Type elementType = pointerType.getElementType();
+    if (!elementType) {
+      llvm::errs() << "Failed to generate store for operation: ";
+      op->print(llvm::errs());
+      llvm::errs() << "\n";
+      return;
+    }
+
+    if (elementType.isIntOrIndex()) {
+      auto attr = newBuilder.getIntegerAttr(elementType, 42);
+      valueToStore =
+          arith::ConstantOp::create(newBuilder, loc, elementType, attr);
+    } else if (auto floatType = dyn_cast<FloatType>(elementType)) {
+      auto attr = newBuilder.getFloatAttr(floatType, 42.0);
+      valueToStore =
+          arith::ConstantOp::create(newBuilder, loc, floatType, attr);
+    } else {
+      llvm::errs() << "Failed to generate store for operation: ";
+      op->print(llvm::errs());
+      llvm::errs() << "\n";
+      return;
+    }
+  }
+
+  // Call the genStore API
+  auto typedResult = cast<TypedValue<PointerLikeType>>(result);
+  bool success =
+      pointerType.genStore(newBuilder, loc, valueToStore, typedResult);
+
+  if (success) {
+    llvm::errs() << "Successfully generated store for operation: ";
+    op->print(llvm::errs());
+    llvm::errs() << "\n";
+
+    // Print all operations that were inserted
+    for (Operation *insertedOp : tracker.insertedOps) {
+      llvm::errs() << "\tGenerated: ";
+      insertedOp->print(llvm::errs());
+      llvm::errs() << "\n";
+    }
+  } else {
+    llvm::errs() << "Failed to generate store for operation: ";
+    op->print(llvm::errs());
+    llvm::errs() << "\n";
+  }
+}
+
 } // namespace
 
 //===----------------------------------------------------------------------===//
diff --git a/revert_patches.txt b/revert_patches.txt
index f4ec0a3444c46..9e465ba90ae6a 100644
--- a/revert_patches.txt
+++ b/revert_patches.txt
@@ -5,6 +5,3 @@ d57230c7 [AMDGPU][MC] Disallow op_sel in some VOP3P dot instructions (#100485)
 breaks build of ROCmValidationSuite
 [C2y] Support WG14 N3457, the __COUNTER__ macro (#162662)
 ---
-breaks rocRAND
-[CUDA][HIP] Fix CTAD for host/device constructors (#168711) 
----
diff --git a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel
index da39e58ac70ed..7dc16674b979f 100644
--- a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel
@@ -2091,6 +2091,7 @@ cc_library(
         "//lldb:Target",
         "//lldb:TargetHeaders",
         "//lldb:Utility",
+        "//llvm:Support",
     ],
 )
 
@@ -2142,11 +2143,14 @@ cc_library(
         ":PluginObjectFilePlaceholder",
         ":PluginProcessUtility",
         "//lldb:Core",
+        "//lldb:CoreHeaders",
         "//lldb:Host",
         "//lldb:InterpreterHeaders",
+        "//lldb:SymbolHeaders",
         "//lldb:Target",
         "//lldb:TargetHeaders",
         "//lldb:Utility",
+        "//llvm:Support",
     ],
 )
 
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index 1428299076fb3..8e9b51b58f4f5 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -1243,42 +1243,12 @@ cc_library(
     ],
 )
 
-AnalysisFpExcSrcs = [
-    "lib/Analysis/ConstantFolding.cpp",
-]
-
-cc_library(
-    name = "AnalysisFpExc",
-    srcs = AnalysisFpExcSrcs,
-    hdrs = glob(
-        [
-            "include/llvm/Analysis/*.h",
-            "include/llvm/Analysis/Utils/*.h",
-        ],
-    ),
-    copts = llvm_copts + ["-ftrapping-math"],
-    textual_hdrs = glob([
-        "include/llvm/Analysis/*.def",
-    ]),
-    deps = [
-        ":BinaryFormat",
-        ":Core",
-        ":Object",
-        ":ProfileData",
-        ":Support",
-        ":TargetParser",
-        ":config",
-        ":target_library_info_gen",
-    ],
-)
-
 cc_library(
     name = "Analysis",
     srcs = glob(
         [
             "lib/Analysis/*.cpp",
         ],
-        exclude = AnalysisFpExcSrcs,
     ),
     hdrs = glob(
         [
@@ -1288,12 +1258,11 @@ cc_library(
     ) + [
         "include/llvm-c/Analysis.h",
     ],
-    copts = llvm_copts,
+    copts = llvm_copts + ["-ftrapping-math"],
     textual_hdrs = glob([
         "include/llvm/Analysis/*.def",
     ]),
     deps = [
-        ":AnalysisFpExc",
         ":BinaryFormat",
         ":Core",
         ":FrontendHLSL",