-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DirectX] Start the creation of a DXIL Instruction legalizer #131221
Conversation
farzonl
commented
Mar 13, 2025
- Legalize i8 truncation back to original types
- remove sext and truncs
- Legalize i64 indicies for insert\extract elements to i32 indicies
- fixes [DX] LLVM Optimizations may generate invalid integer types #126323
- fixes [DirectX] Re-evaluate pass ordering for producing correct DXIL module flags #129757
@llvm/pr-subscribers-backend-directx Author: Farzon Lotfi (farzonl) Changes
Patch is 21.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131221.diff 11 Files Affected:
diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt
index 6904a1c0f1e73..13f8adbe4f132 100644
--- a/llvm/lib/Target/DirectX/CMakeLists.txt
+++ b/llvm/lib/Target/DirectX/CMakeLists.txt
@@ -32,6 +32,7 @@ add_llvm_target(DirectXCodeGen
DXILShaderFlags.cpp
DXILTranslateMetadata.cpp
DXILRootSignature.cpp
+ DXILLegalizePass.cpp
LINK_COMPONENTS
Analysis
diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
new file mode 100644
index 0000000000000..934a6e75ad844
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -0,0 +1,198 @@
+//===- DXILLegalizePass.cpp - Legalizes llvm IR for DXIL-*- C++----------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
+///
+/// \file This file contains a pass to remove i8 truncations and i64 extract
+/// and insert elements.
+///
+//===----------------------------------------------------------------------===//
+#include "DXILLegalizePass.h"
+#include "DirectX.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <functional>
+#include <map>
+#include <stack>
+#include <vector>
+
+#define DEBUG_TYPE "dxil-legalize"
+
+using namespace llvm;
+namespace {
+
+static bool fixI8TruncUseChain(Instruction &I,
+ std::stack<Instruction *> &ToRemove,
+ std::map<Value *, Value *> &ReplacedValues) {
+
+ if (auto *Trunc = dyn_cast<TruncInst>(&I)) {
+ if (Trunc->getDestTy()->isIntegerTy(8)) {
+ ReplacedValues[Trunc] = Trunc->getOperand(0);
+ ToRemove.push(Trunc);
+ }
+ } else if (I.getType()->isIntegerTy(8)) {
+ IRBuilder<> Builder(&I);
+
+ std::vector<Value *> NewOperands;
+ Type *InstrType = nullptr;
+ for (unsigned OpIdx = 0; OpIdx < I.getNumOperands(); ++OpIdx) {
+ Value *Op = I.getOperand(OpIdx);
+ if (ReplacedValues.count(Op)) {
+ InstrType = ReplacedValues[Op]->getType();
+ NewOperands.push_back(ReplacedValues[Op]);
+ } else if (auto *Imm = dyn_cast<ConstantInt>(Op)) {
+ APInt Value = Imm->getValue();
+ unsigned NewBitWidth = InstrType->getIntegerBitWidth();
+ // Note: options here are sext or sextOrTrunc.
+ // Since i8 isn't suppport we assume new values
+ // will always have a higher bitness.
+ APInt NewValue = Value.sext(NewBitWidth);
+ NewOperands.push_back(ConstantInt::get(InstrType, NewValue));
+ } else {
+ assert(!Op->getType()->isIntegerTy(8));
+ NewOperands.push_back(Op);
+ }
+ }
+
+ Value *NewInst = nullptr;
+ if (auto *BO = dyn_cast<BinaryOperator>(&I))
+ NewInst =
+ Builder.CreateBinOp(BO->getOpcode(), NewOperands[0], NewOperands[1]);
+ else if (auto *Cmp = dyn_cast<CmpInst>(&I))
+ NewInst = Builder.CreateCmp(Cmp->getPredicate(), NewOperands[0],
+ NewOperands[1]);
+ else if (auto *Cast = dyn_cast<CastInst>(&I))
+ NewInst = Builder.CreateCast(Cast->getOpcode(), NewOperands[0],
+ Cast->getDestTy());
+ else if (auto *UnaryOp = dyn_cast<UnaryOperator>(&I))
+ NewInst = Builder.CreateUnOp(UnaryOp->getOpcode(), NewOperands[0]);
+
+ if (NewInst) {
+ ReplacedValues[&I] = NewInst;
+ ToRemove.push(&I);
+ }
+ } else if (auto *Sext = dyn_cast<SExtInst>(&I)) {
+ if (Sext->getSrcTy()->isIntegerTy(8)) {
+ ToRemove.push(Sext);
+ Sext->replaceAllUsesWith(ReplacedValues[Sext->getOperand(0)]);
+ }
+ }
+
+ return !ToRemove.empty();
+}
+
+static bool downcastI64toI32InsertExtractElements(
+ Instruction &I, std::stack<Instruction *> &ToRemove, std::map<Value *, Value *> &) {
+
+ if (auto *Extract = dyn_cast<ExtractElementInst>(&I)) {
+ Value *Idx = Extract->getIndexOperand();
+ auto *CI = dyn_cast<ConstantInt>(Idx);
+ if (CI && CI->getBitWidth() == 64) {
+ IRBuilder<> Builder(Extract);
+ int64_t IndexValue = CI->getSExtValue();
+ auto *Idx32 =
+ ConstantInt::get(Type::getInt32Ty(I.getContext()), IndexValue);
+ Value *NewExtract =
+ Builder.CreateExtractElement(Extract->getVectorOperand(), Idx32,Extract->getName());
+
+ Extract->replaceAllUsesWith(NewExtract);
+ ToRemove.push(Extract);
+ }
+ }
+
+ if (auto *Insert = dyn_cast<InsertElementInst>(&I)) {
+ Value *Idx = Insert->getOperand(2);
+ auto *CI = dyn_cast<ConstantInt>(Idx);
+ if (CI && CI->getBitWidth() == 64) {
+ int64_t IndexValue = CI->getSExtValue();
+ auto *Idx32 =
+ ConstantInt::get(Type::getInt32Ty(I.getContext()), IndexValue);
+ IRBuilder<> Builder(Insert);
+ Value *Insert32Index = Builder.CreateInsertElement(
+ Insert->getOperand(0), Insert->getOperand(1), Idx32, Insert->getName());
+
+ Insert->replaceAllUsesWith(Insert32Index);
+ ToRemove.push(Insert);
+ }
+ }
+
+ return !ToRemove.empty();
+}
+
+class DXILLegalizationPipeline {
+
+public:
+ DXILLegalizationPipeline() { initializeLegalizationPipeline(); }
+
+ bool runLegalizationPipeline(Function &F) {
+ std::stack<Instruction *> ToRemove;
+ std::map<Value *, Value *> ReplacedValues;
+ bool MadeChanges = false;
+ for (auto &I : instructions(F)) {
+ for (auto &LegalizationFn : LegalizationPipeline) {
+ MadeChanges = LegalizationFn(I, ToRemove, ReplacedValues);
+ }
+ }
+ while (!ToRemove.empty()) {
+ Instruction *I = ToRemove.top();
+ I->eraseFromParent();
+ ToRemove.pop();
+ }
+
+ return MadeChanges;
+ }
+
+private:
+ std::vector<std::function<bool(Instruction &, std::stack<Instruction *> &,
+ std::map<Value *, Value *> &)>>
+ LegalizationPipeline;
+
+ void initializeLegalizationPipeline() {
+ LegalizationPipeline.push_back(fixI8TruncUseChain);
+ LegalizationPipeline.push_back(downcastI64toI32InsertExtractElements);
+ }
+};
+
+class DXILLegalizeLegacy : public FunctionPass {
+
+public:
+ bool runOnFunction(Function &F) override;
+ DXILLegalizeLegacy() : FunctionPass(ID) {}
+
+ static char ID; // Pass identification.
+};
+} // namespace
+
+PreservedAnalyses DXILLegalizePass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ DXILLegalizationPipeline DXLegalize;
+ bool MadeChanges = DXLegalize.runLegalizationPipeline(F);
+ if (!MadeChanges)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ return PA;
+}
+
+bool DXILLegalizeLegacy::runOnFunction(Function &F) {
+ DXILLegalizationPipeline DXLegalize;
+ return DXLegalize.runLegalizationPipeline(F);
+}
+
+char DXILLegalizeLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(DXILLegalizeLegacy, DEBUG_TYPE, "DXIL Legalizer", false,
+ false)
+INITIALIZE_PASS_END(DXILLegalizeLegacy, DEBUG_TYPE, "DXIL Legalizer", false,
+ false)
+
+FunctionPass *llvm::createDXILLegalizeLegacyPass() {
+ return new DXILLegalizeLegacy();
+}
diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.h b/llvm/lib/Target/DirectX/DXILLegalizePass.h
new file mode 100644
index 0000000000000..39ef6f532dca0
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.h
@@ -0,0 +1,22 @@
+//===- DXILLegalizePass.h - Legalizes llvm IR for DXIL-*- C++------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_DIRECTX_LEGALIZE_H
+#define LLVM_TARGET_DIRECTX_LEGALIZE_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class DXILLegalizePass : public PassInfoMixin<DXILLegalizePass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+} // namespace llvm
+
+#endif // LLVM_TARGET_DIRECTX_LEGALIZE_H
diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h
index 42aa0da16e8aa..96a8a08c875f8 100644
--- a/llvm/lib/Target/DirectX/DirectX.h
+++ b/llvm/lib/Target/DirectX/DirectX.h
@@ -47,6 +47,13 @@ void initializeDXILFlattenArraysLegacyPass(PassRegistry &);
/// Pass to flatten arrays into a one dimensional DXIL legal form
ModulePass *createDXILFlattenArraysLegacyPass();
+/// Initializer DXIL legalizationPass
+void initializeDXILLegalizeLegacyPass(PassRegistry &);
+
+/// Pass to Legalize DXIL by remove i8 truncations and i64 insert/extract
+/// elements
+FunctionPass *createDXILLegalizeLegacyPass();
+
/// Initializer for DXILOpLowering
void initializeDXILOpLoweringLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/DirectX/DirectXPassRegistry.def b/llvm/lib/Target/DirectX/DirectXPassRegistry.def
index aee0a4ff83d43..87d91ead1896f 100644
--- a/llvm/lib/Target/DirectX/DirectXPassRegistry.def
+++ b/llvm/lib/Target/DirectX/DirectXPassRegistry.def
@@ -38,4 +38,5 @@ MODULE_PASS("print<dxil-root-signature>", dxil::RootSignatureAnalysisPrinter(dbg
#define FUNCTION_PASS(NAME, CREATE_PASS)
#endif
FUNCTION_PASS("dxil-resource-access", DXILResourceAccess())
+FUNCTION_PASS("dxil-legalize", DXILLegalizePass())
#undef FUNCTION_PASS
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 82dc1c6af562a..ce408b4034f83 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -15,6 +15,7 @@
#include "DXILDataScalarization.h"
#include "DXILFlattenArrays.h"
#include "DXILIntrinsicExpansion.h"
+#include "DXILLegalizePass.h"
#include "DXILOpLowering.h"
#include "DXILPrettyPrinter.h"
#include "DXILResourceAccess.h"
@@ -52,6 +53,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
initializeDXILDataScalarizationLegacyPass(*PR);
initializeDXILFlattenArraysLegacyPass(*PR);
initializeScalarizerLegacyPassPass(*PR);
+ initializeDXILLegalizeLegacyPass(*PR);
initializeDXILPrepareModulePass(*PR);
initializeEmbedDXILPassPass(*PR);
initializeWriteDXILPassPass(*PR);
@@ -99,6 +101,7 @@ class DirectXPassConfig : public TargetPassConfig {
ScalarizerPassOptions DxilScalarOptions;
DxilScalarOptions.ScalarizeLoadStore = true;
addPass(createScalarizerPass(DxilScalarOptions));
+ addPass(createDXILLegalizeLegacyPass());
addPass(createDXILTranslateMetadataLegacyPass());
addPass(createDXILOpLoweringLegacyPass());
addPass(createDXILPrepareModulePass());
diff --git a/llvm/test/CodeGen/DirectX/legalize-i64-extract-insert-elements.ll b/llvm/test/CodeGen/DirectX/legalize-i64-extract-insert-elements.ll
new file mode 100644
index 0000000000000..8a59986524c90
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/legalize-i64-extract-insert-elements.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -passes='dxil-legalize' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+define noundef <4 x float> @float4_extract(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i32 0
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i32 1
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i32 2
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i32 3
+ ; CHECK: insertelement <4 x float> poison, float [[ee0]], i32 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ee1]], i32 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ee2]], i32 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ee3]], i32 3
+
+ %a.i0 = extractelement <4 x float> %a, i64 0
+ %a.i1 = extractelement <4 x float> %a, i64 1
+ %a.i2 = extractelement <4 x float> %a, i64 2
+ %a.i3 = extractelement <4 x float> %a, i64 3
+
+ %.upto0 = insertelement <4 x float> poison, float %a.i0, i64 0
+ %.upto1 = insertelement <4 x float> %.upto0, float %a.i1, i64 1
+ %.upto2 = insertelement <4 x float> %.upto1, float %a.i2, i64 2
+ %0 = insertelement <4 x float> %.upto2, float %a.i3, i64 3
+ ret <4 x float> %0
+}
diff --git a/llvm/test/CodeGen/DirectX/legalize-i8.ll b/llvm/test/CodeGen/DirectX/legalize-i8.ll
new file mode 100644
index 0000000000000..d7cea585a056b
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/legalize-i8.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -passes='dxil-legalize' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+define i32 @i8trunc(float %0) #0 {
+ ; CHECK-NOT: %4 = trunc nsw i32 %3 to i8
+ ; CHECK: add i32
+ ; CHECK-NEXT: srem i32
+ ; CHECK-NEXT: sub i32
+ ; CHECK-NEXT: mul i32
+ ; CHECK-NEXT: udiv i32
+ ; CHECK-NEXT: sdiv i32
+ ; CHECK-NEXT: urem i32
+ ; CHECK-NEXT: and i32
+ ; CHECK-NEXT: or i32
+ ; CHECK-NEXT: xor i32
+ ; CHECK-NEXT: shl i32
+ ; CHECK-NEXT: lshr i32
+ ; CHECK-NEXT: ashr i32
+ ; CHECK-NOT: %7 = sext i8 %6 to i32
+
+ %2 = fptosi float %0 to i32
+ %3 = srem i32 %2, 8
+ %4 = trunc nsw i32 %3 to i8
+ %5 = add nsw i8 %4, 1
+ %6 = srem i8 %5, 8
+ %7 = sub i8 %6, 1
+ %8 = mul i8 %7, 1
+ %9 = udiv i8 %8, 1
+ %10 = sdiv i8 %9, 1
+ %11 = urem i8 %10, 1
+ %12 = and i8 %11, 1
+ %13 = or i8 %12, 1
+ %14 = xor i8 %13, 1
+ %15 = shl i8 %14, 1
+ %16 = lshr i8 %15, 1
+ %17 = ashr i8 %16, 1
+ %18 = sext i8 %17 to i32
+ ret i32 %18
+}
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index 3a9af4d744f98..ee70cec534bc5 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -21,6 +21,7 @@
; CHECK-NEXT: DXIL Resource Access
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Scalarize vector operations
+; CHECK-NEXT: DXIL Legalizer
; CHECK-NEXT: DXIL Resource Binding Analysis
; CHECK-NEXT: DXIL Module Metadata analysis
; CHECK-NEXT: DXIL Shader Flag Analysis
diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
index 4e522c6ef5da7..7e5a92e1311f8 100644
--- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
+++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
@@ -44,10 +44,10 @@ define <4 x i32> @load_array_vec_test() #0 {
; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]]
; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP6]], [[DOTI25]]
; CHECK-NEXT: [[DOTI311:%.*]] = add i32 [[TMP8]], [[DOTI37]]
-; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i64 0
-; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i64 1
-; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i64 2
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i64 3
+; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i32 0
+; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i32 1
+; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i32 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i32 3
; CHECK-NEXT: ret <4 x i32> [[TMP16]]
;
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4
@@ -68,10 +68,10 @@ define <4 x i32> @load_vec_test() #0 {
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 3) to ptr addrspace(3)
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
-; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
-; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[TMP4]], i64 1
-; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[TMP6]], i64 2
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[TMP8]], i64 3
+; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[TMP4]], i32 1
+; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[TMP6]], i32 2
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[TMP8]], i32 3
; CHECK-NEXT: ret <4 x i32> [[TMP9]]
;
%1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4
@@ -93,10 +93,10 @@ define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
; CHECK-NEXT: [[DOTFLAT_I3:%.*]] = getelementptr i32, ptr [[TMP5]], i32 3
; CHECK-NEXT: [[DOTI3:%.*]] = load i32, ptr [[DOTFLAT_I3]], align 4
-; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
-; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[DOTI1]], i64 1
-; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[DOTI2]], i64 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[DOTI3]], i64 3
+; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[DOTI1]], i32 1
+; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[DOTI2]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[DOTI3]], i32 3
; CHECK-NEXT: ret <4 x i32> [[TMP6]]
;
%3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index
@@ -127,10 +127,10 @@ define <4 x i32> @multid_load_test() #0 {
; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]]
; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP6]], [[DOTI25]]
; CHECK-NEXT: [[DOTI311:%.*]] = add i32 [[TMP8]], [[DOTI37]]
-; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i64 0
-; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i64 1
-; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i64 2
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i64 3
+; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i32 0
+; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i32 1
+; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i32 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i32 3
; CHECK-NEXT: ret <4 x i32> [[TMP16]]
;
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4
diff --git a/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll b/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll
index 0546a5505416f..7e8f58c0576f0 100644
--- a/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll
+++ b/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll
@@ -3,22 +3,22 @@
; CHECK: target triple = "dxilv1.3-pc-shadermodel6.3-library"
; CHECK-LABEL: cos_sin_float_test
define noundef <4 x float> @cos_sin_float_test(<4 x float> noundef %a) #0 {
- ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i32 0
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]])
- ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i32 1
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]])
- ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; ...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
9fd631a
to
dd6fd3c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good aside from the nits I had on the code comments.
I also rebased my int64ops branch to this PR and found that it does properly fix the dxil validation issues.
…we can reduce i64 insert/extracts to i32
…the first argument
//===---------------------------------------------------------------------===// | ||
/// | ||
/// \file This file contains a pass to remove i8 truncations and i64 extract | ||
/// and insert elements. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this the only set of legalizations you expect this pass to handle? If not, this will probably get out of date quickly.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===---------------------------------------------------------------------===// | ||
//===---------------------------------------------------------------------===// |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Doubled heading line is unncessary.
(Cmp && Cmp->getOperand(0)->getType()->isIntegerTy(8))) { | ||
IRBuilder<> Builder(&I); | ||
|
||
std::vector<Value *> NewOperands; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please use llvm::SmallVector to avoid dynamic allocations.
namespace { | ||
|
||
static void fixI8TruncUseChain(Instruction &I, | ||
std::stack<Instruction *> &ToRemove, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
llvm::SmallVector and llvm::SmallVectorImpl should be preferred over std::stack.
while (!ToRemove.empty()) { | ||
Instruction *I = ToRemove.top(); | ||
I->eraseFromParent(); | ||
ToRemove.pop(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You don't really need to be using a stack here right? Isn't it reasonable to just iterate and erase?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do need a stack because most the i8 cases can’t use replaceAllUsesWith because the instruction types are different. My work around is to remove starting from the last instruction we saw so that there are no uses when we call eraseFromParent
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You don't need a stack for that. You can reverse iterate a SmallVector: for (auto *Inst : reverse(ToRemove))
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So the list size isn't huge so this probably doesn't matter, but reverse would be applying std::iter_swap to every pair of iterators thats O(n) + O(n) for erasing vs just O(n). I'm guessing the SmallVector
benefits outweigh this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
llvm::reverse
creates a range iterator with reverse iterators, not std::iter_swap
, so it is O(n):
https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/ADT/STLExtras.h#L428
for (auto &I : instructions(F)) { | ||
for (auto &LegalizationFn : LegalizationPipeline) { | ||
LegalizationFn(I, ToRemove, ReplacedValues); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
|
||
static void fixI8TruncUseChain(Instruction &I, | ||
std::stack<Instruction *> &ToRemove, | ||
std::map<Value *, Value *> &ReplacedValues) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
llvm::DenseMap should almost always be used instead of std::map.
see: https://llvm.org/docs/CodingStandards.html#c-standard-library
#define DEBUG_TYPE "dxil-legalize" | ||
|
||
using namespace llvm; | ||
namespace { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: don't include static
functions in the anonymous namespace.
Because of this, we have a simple guideline: make anonymous namespaces as small as possible, and only use them for class declarations.
see: https://llvm.org/docs/CodingStandards.html#restrict-visibility
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/70/builds/7750 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/169/builds/9526 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/146/builds/2515 Here is the relevant piece of the build log for the reference
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A fairly minor style comment and a couple places we could probably add asserts that are probably worth doing if you're planning on more updates in the area.
using namespace llvm; | ||
namespace { | ||
|
||
static void fixI8TruncUseChain(Instruction &I, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I find the nesting in this function hard to follow, especially since the longest case is in the middle. Since all of the instruction types are mutually exclusive, do you think it would read better to structure it like:
if (auto *Trunc = dyn_cast<TruncInst>(&I)) {
// ...
return;
}
if (auto *Cast = dyn_cast<CastInst>(&I)) {
// ...
return;
}
auto *Cmp = dyn_cast<CmpInst>(&I);
if (!I.getType()->isIntegerTy(8) &&
!(Cmp && Cmp->getOperand(0)->getType()->isIntegerTy(8)))
// Nothing to do...
return;
// ...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think i have something cleaner let me know in the new pr.
if (ReplacedValues.count(Op)) | ||
InstrType = ReplacedValues[Op]->getType(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to get conflicting InstrTypes here? Does this deserve an assert?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I want to say no, but not sure what you mean. I'll just explain what I was thinking here and maybe that will answer your question.
So I'm setting the default instruction type to i32 that happens on line 48. That means if we ever have an instruction and all its operands are imm 8 bits we would sext them all to 32. What the loop on line 49 does is to look at the operands and see if we have created any replacement types for them. these typically end up being the values before a trunc operation. Now we are able to set the instruction type. This is really only important for BinaryOperator instructions.
You can see the above explanations exercised in the i16_test
and the all_imm
of llvm/test/CodeGen/DirectX/legalize-i8.ll
.
// Since i8 isn't supported, we assume new values | ||
// will always have a higher bitness. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
An assert to document/enforce the assumption wouldn't hurt
- [x] [Don't include static inside anonymous namespace](#131221 (comment)) - [x] [Use DenseMap](#131221 (comment)) - [x] [remove {}](#131221 (comment)) - [x] [remove std::stack with llvm::reverse of SmallVector](#131221 (comment)) - [x] [replace std::vector with llvm::SmallVector](#131221 (comment)) - [x] [Remove legalize comment block](#131221 (comment)) and [double comment block](#131221 (comment)) - [x] [Refactor fixI8TruncUseChain to remove nesting](#131221 (comment)) - [x] [add asserts on assumptions](#131221 (comment))
- [x] [Don't include static inside anonymous namespace](llvm/llvm-project#131221 (comment)) - [x] [Use DenseMap](llvm/llvm-project#131221 (comment)) - [x] [remove {}](llvm/llvm-project#131221 (comment)) - [x] [remove std::stack with llvm::reverse of SmallVector](llvm/llvm-project#131221 (comment)) - [x] [replace std::vector with llvm::SmallVector](llvm/llvm-project#131221 (comment)) - [x] [Remove legalize comment block](llvm/llvm-project#131221 (comment)) and [double comment block](llvm/llvm-project#131221 (comment)) - [x] [Refactor fixI8TruncUseChain to remove nesting](llvm/llvm-project#131221 (comment)) - [x] [add asserts on assumptions](llvm/llvm-project#131221 (comment))