-
Notifications
You must be signed in to change notification settings - Fork 14k
[Reland] Adjust bit cast instruction filter for DXIL Prepare pass #143783
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Reland] Adjust bit cast instruction filter for DXIL Prepare pass #143783
Conversation
@llvm/pr-subscribers-backend-directx Author: Joshua Batista (bob80905) ChangesRelands #142678 after a revert was needed due to unforeseen bugs. Full diff: https://github.com/llvm/llvm-project/pull/143783.diff 3 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index e0068787f5e5a..cb58f4833631d 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -148,9 +148,49 @@ class DXILPrepareModule : public ModulePass {
Type *Ty) {
// Omit bitcasts if the incoming value matches the instruction type.
auto It = PointerTypes.find(Operand);
- if (It != PointerTypes.end())
- if (cast<TypedPointerType>(It->second)->getElementType() == Ty)
+ if (It != PointerTypes.end()) {
+ auto *OpTy = cast<TypedPointerType>(It->second)->getElementType();
+ if (OpTy == Ty)
return nullptr;
+ }
+
+ Type *ValTy = Operand->getType();
+ // Also omit the bitcast for matching global array types
+ if (auto *GlobalVar = dyn_cast<GlobalVariable>(Operand))
+ ValTy = GlobalVar->getValueType();
+
+ if (auto *AI = dyn_cast<AllocaInst>(Operand))
+ ValTy = AI->getAllocatedType();
+
+ if (auto *ArrTy = dyn_cast<ArrayType>(ValTy)) {
+ Type *ElTy = ArrTy->getElementType();
+ if (ElTy == Ty)
+ return nullptr;
+ }
+
+ // finally, drill down GEP instructions until we get the array
+ // that is being accessed, and compare element types
+ if (ConstantExpr *GEPInstr = dyn_cast<ConstantExpr>(Operand)) {
+ while (GEPInstr->getOpcode() == Instruction::GetElementPtr) {
+ Value *OpArg = GEPInstr->getOperand(0);
+ if (ConstantExpr *NewGEPInstr = dyn_cast<ConstantExpr>(OpArg)) {
+ GEPInstr = NewGEPInstr;
+ continue;
+ }
+
+ if (auto *GlobalVar = dyn_cast<GlobalVariable>(OpArg))
+ ValTy = GlobalVar->getValueType();
+ if (auto *AI = dyn_cast<AllocaInst>(Operand))
+ ValTy = AI->getAllocatedType();
+ if (auto *ArrTy = dyn_cast<ArrayType>(ValTy)) {
+ Type *ElTy = ArrTy->getElementType();
+ if (ElTy == Ty)
+ return nullptr;
+ }
+ break;
+ }
+ }
+
// Insert bitcasts where we are removing the instruction.
Builder.SetInsertPoint(&Inst);
// This code only gets hit in opaque-pointer mode, so the type of the
diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
index 7e5a92e1311f8..30ee555ca2dda 100644
--- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
+++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
@@ -60,19 +60,15 @@ define <4 x i32> @load_array_vec_test() #0 {
define <4 x i32> @load_vec_test() #0 {
; CHECK-LABEL: define <4 x i32> @load_vec_test(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(3) @vecData.scalarized to ptr addrspace(3)
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 1) to ptr addrspace(3)
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 2) to ptr addrspace(3)
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 3) to ptr addrspace(3)
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
-; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
-; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[TMP4]], i32 1
-; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[TMP6]], i32 2
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[TMP8]], i32 3
-; CHECK-NEXT: ret <4 x i32> [[TMP9]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @vecData.scalarized, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 1), align 4
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 2), align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 3), align 4
+; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[TMP2]], i32 1
+; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[TMP3]], i32 2
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x i32> [[TMP5]]
;
%1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4
ret <4 x i32> %1
@@ -107,31 +103,23 @@ define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
define <4 x i32> @multid_load_test() #0 {
; CHECK-LABEL: define <4 x i32> @multid_load_test(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim to ptr addrspace(3)
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1) to ptr addrspace(3)
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 2) to ptr addrspace(3)
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 3) to ptr addrspace(3)
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1) to ptr addrspace(3)
-; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
-; CHECK-NEXT: [[DOTI12:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 1) to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4
-; CHECK-NEXT: [[DOTI24:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 2) to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4
-; CHECK-NEXT: [[DOTI36:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 3) to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4
-; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]]
-; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]]
-; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP6]], [[DOTI25]]
-; CHECK-NEXT: [[DOTI311:%.*]] = add i32 [[TMP8]], [[DOTI37]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), align 4
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 2), align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 3), align 4
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), align 4
+; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 1), align 4
+; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 2), align 4
+; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 3), align 4
+; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP1]], [[TMP5]]
+; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP2]], [[DOTI13]]
+; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP3]], [[DOTI25]]
+; CHECK-NEXT: [[DOTI311:%.*]] = add i32 [[TMP4]], [[DOTI37]]
; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i32 0
; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i32 1
; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i32 2
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i32 3
-; CHECK-NEXT: ret <4 x i32> [[TMP16]]
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i32 3
+; CHECK-NEXT: ret <4 x i32> [[TMP6]]
;
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4
diff --git a/llvm/test/CodeGen/DirectX/noop_bitcast_global_array_type.ll b/llvm/test/CodeGen/DirectX/noop_bitcast_global_array_type.ll
new file mode 100644
index 0000000000000..1f33700e014c7
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/noop_bitcast_global_array_type.ll
@@ -0,0 +1,53 @@
+; RUN: opt -S --dxil-prepare %s | FileCheck %s
+
+; Test that global arrays do not get a bitcast instruction
+; after the dxil-prepare pass.
+
+target triple = "dxilv1.2-unknown-shadermodel6.2-compute"
+
+@inputTile.1dim = local_unnamed_addr addrspace(3) global [3 x float] zeroinitializer, align 2
+
+; CHECK-LABEL: testload
+define float @testload() local_unnamed_addr {
+ ; NOTE: this would be "bitcast ptr addrspace(3)..." before the change that introduced this test,
+ ; after the dxil-prepare pass is run
+ ; CHECK-NEXT: load float, ptr addrspace(3) @inputTile.1dim, align 2
+ %v = load float, ptr addrspace(3) @inputTile.1dim, align 2
+
+ ret float %v
+}
+
+; CHECK-LABEL: teststore
+define void @teststore() local_unnamed_addr {
+ ; CHECK-next: store float 2.000000e+00, ptr addrspace(3) @inputTile.1dim, align 2
+ store float 2.000000e+00, ptr addrspace(3) @inputTile.1dim, align 2
+
+ ret void
+}
+
+; CHECK-LABEL: testGEPConst
+define float @testGEPConst() local_unnamed_addr {
+ ; CHECK-NEXT: load float, ptr addrspace(3) getelementptr (float, ptr addrspace(3) @inputTile.1dim, i32 1), align 4
+ %v = load float, ptr addrspace(3) getelementptr (float, ptr addrspace(3) @inputTile.1dim, i32 1), align 4
+
+ ret float %v
+}
+
+; CHECK-LABEL: testGEPNonConst
+define float @testGEPNonConst(i32 %i) local_unnamed_addr {
+ ; CHECK-NEXT: getelementptr float, ptr addrspace(3) @inputTile.1dim, i32 %i
+ %gep = getelementptr float, ptr addrspace(3) @inputTile.1dim, i32 %i
+ %v = load float, ptr addrspace(3) %gep
+
+ ret float %v
+}
+
+; CHECK-LABEL: testAlloca
+define float @testAlloca(i32 %i) local_unnamed_addr {
+ ; CHECK-NEXT: alloca [3 x float], align 4
+ %arr = alloca [3 x float], align 4
+ ; CHECK-NEXT: getelementptr [3 x float], ptr %arr, i32 1
+ %gep = getelementptr [3 x float], ptr %arr, i32 1
+ %v = load float, ptr %gep
+ ret float %v
+}
|
634cfcc
to
7c7966d
Compare
This is great work Josh. I want you to know how huge your change is for getting DML tests passing these are the dxv validation errors before and afters
After:
All 586 bitcast errors are gone and unambiguous TGSM is down to just 24. So not only did you resolve your ticket this change resolved 98.82% of #140416! |
Relands #142678, with a new change to remove an unnecessary gep argument, after a revert was needed due to unforeseen bugs.
Fixes #139013