Skip to content

Commit 5136886

Browse files
committed
implement @bogner suggestions
1 parent f37e410 commit 5136886

File tree

3 files changed

+56
-49
lines changed

3 files changed

+56
-49
lines changed

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -541,10 +541,12 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
541541
return Builder.CreateFMul(X, PiOver180);
542542
}
543543

544-
static Value *expandTypedBufferLoadIntrinsic(CallInst *Orig) {
544+
static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
545545
IRBuilder<> Builder(Orig);
546546

547547
Type *BufferTy = Orig->getType()->getStructElementType(0);
548+
assert(BufferTy->getScalarType()->isDoubleTy() &&
549+
"Only expand double or double2");
548550

549551
unsigned ExtractNum = 2;
550552
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
@@ -566,7 +568,7 @@ static Value *expandTypedBufferLoadIntrinsic(CallInst *Orig) {
566568
SmallVector<Value *> ExtractElements;
567569
for (unsigned I = 0; I < ExtractNum; ++I)
568570
ExtractElements.push_back(
569-
Builder.CreateExtractElement(Extract, (uint64_t)I));
571+
Builder.CreateExtractElement(Extract, Builder.getInt32(I)));
570572

571573
// combine into double(s)
572574
Value *Result = PoisonValue::get(BufferTy);
@@ -575,23 +577,40 @@ static Value *expandTypedBufferLoadIntrinsic(CallInst *Orig) {
575577
Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
576578
{ExtractElements[I], ExtractElements[I + 1]});
577579
if (ExtractNum == 4)
578-
Result = Builder.CreateInsertElement(Result, Dbl, (uint64_t)I / 2);
580+
Result =
581+
Builder.CreateInsertElement(Result, Dbl, Builder.getInt32(I / 2));
579582
else
580583
Result = Dbl;
581584
}
582585

583-
Value *CheckBit = Builder.CreateExtractValue(Load, {1});
584-
585-
Value *Struct = PoisonValue::get(Orig->getType());
586-
Struct = Builder.CreateInsertValue(Struct, Result, {0});
587-
Struct = Builder.CreateInsertValue(Struct, CheckBit, {1});
588-
return Struct;
586+
Value *CheckBit = nullptr;
587+
for (User *U : make_early_inc_range(Orig->users())) {
588+
auto *EVI = cast<ExtractValueInst>(U);
589+
ArrayRef<unsigned> Indices = EVI->getIndices();
590+
assert(Indices.size() == 1);
591+
592+
if (Indices[0] == 0) {
593+
// Use of the value(s)
594+
EVI->replaceAllUsesWith(Result);
595+
} else {
596+
// Use of the check bit
597+
assert(Indices[0] == 1 && "Unexpected type for typedbufferload");
598+
if (!CheckBit)
599+
CheckBit = Builder.CreateExtractValue(Load, {1});
600+
EVI->replaceAllUsesWith(CheckBit);
601+
}
602+
EVI->eraseFromParent();
603+
}
604+
Orig->eraseFromParent();
605+
return true;
589606
}
590607

591-
static Value *expandTypedBufferStoreIntrinsic(CallInst *Orig) {
608+
static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
592609
IRBuilder<> Builder(Orig);
593610

594611
Type *BufferTy = Orig->getFunctionType()->getParamType(2);
612+
assert(BufferTy->getScalarType()->isDoubleTy() &&
613+
"Only expand double or double2");
595614

596615
unsigned ExtractNum = 2;
597616
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
@@ -614,14 +633,16 @@ static Value *expandTypedBufferStoreIntrinsic(CallInst *Orig) {
614633
Value *Val;
615634
if (ExtractNum == 2) {
616635
Val = PoisonValue::get(VectorType::get(SplitElementTy, 2, false));
617-
Val = Builder.CreateInsertElement(Val, LowBits, (uint64_t)0);
618-
Val = Builder.CreateInsertElement(Val, HighBits, 1);
636+
Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
637+
Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
619638
} else
620639
Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
621640

622-
return Builder.CreateIntrinsic(
623-
Builder.getVoidTy(), Intrinsic::dx_resource_store_typedbuffer,
624-
{Orig->getOperand(0), Orig->getOperand(1), Val});
641+
Builder.CreateIntrinsic(Builder.getVoidTy(),
642+
Intrinsic::dx_resource_store_typedbuffer,
643+
{Orig->getOperand(0), Orig->getOperand(1), Val});
644+
Orig->eraseFromParent();
645+
return true;
625646
}
626647

627648
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic) {
@@ -753,10 +774,12 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
753774
Result = expandRadiansIntrinsic(Orig);
754775
break;
755776
case Intrinsic::dx_resource_load_typedbuffer:
756-
Result = expandTypedBufferLoadIntrinsic(Orig);
777+
if (expandTypedBufferLoadIntrinsic(Orig))
778+
return true;
757779
break;
758780
case Intrinsic::dx_resource_store_typedbuffer:
759-
Result = expandTypedBufferStoreIntrinsic(Orig);
781+
if (expandTypedBufferStoreIntrinsic(Orig))
782+
return true;
760783
break;
761784
case Intrinsic::usub_sat:
762785
Result = expandUsubSat(Orig);

llvm/test/CodeGen/DirectX/BufferLoadDouble.ll

Lines changed: 14 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ define void @loadf64() {
1111
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
1212
i32 0, i32 1, i32 1, i32 0, i1 false)
1313

14-
ret void
15-
1614
; check we load an <2 x i32> instead of a double
1715
; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.typedbuffer
1816
; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
@@ -23,14 +21,10 @@ define void @loadf64() {
2321

2422
; check we extract the two i32 and construct a double
2523
; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
26-
; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i64 0
27-
; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i64 1
24+
; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0
25+
; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1
2826
; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
29-
; construct a new {double, i1}
30-
; CHECK: [[CB:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 1
31-
; CHECK: [[S1:%.*]] = insertvalue { double, i1 } poison, double [[DBL]], 0
32-
; CHECK: [[S2:%.*]] = insertvalue { double, i1 } [[S1]], i1 [[CB]], 1
33-
; CHECK: extractvalue { double, i1 } [[S2]], 0
27+
; CHECK-NOT: extractvalue { double, i1 }
3428
%data0 = extractvalue {double, i1} %load0, 0
3529
ret void
3630
}
@@ -53,19 +47,15 @@ define void @loadv2f64() {
5347

5448
; check we extract the 4 i32 and construct a <2 x double>
5549
; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0
56-
; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i64 0
57-
; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i64 1
58-
; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i64 2
59-
; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i64 3
50+
; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0
51+
; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1
52+
; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2
53+
; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3
6054
; CHECK: [[Dbl1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]])
61-
; CHECK: [[Vec:%.*]] = insertelement <2 x double> poison, double [[Dbl1]], i64 0
55+
; CHECK: [[Vec:%.*]] = insertelement <2 x double> poison, double [[Dbl1]], i32 0
6256
; CHECK: [[Dbl2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]])
63-
; CHECK: [[Vec2:%.*]] = insertelement <2 x double> [[Vec]], double [[Dbl2]], i64 1
64-
; construct a new {<2 x double>, i1}
65-
; CHECK: [[CB:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1
66-
; CHECK: [[S1:%.*]] = insertvalue { <2 x double>, i1 } poison, <2 x double> [[Vec2]], 0
67-
; CHECK: [[S2:%.*]] = insertvalue { <2 x double>, i1 } [[S1]], i1 [[CB]], 1
68-
; CHECK: extractvalue { <2 x double>, i1 } [[S2]], 0
57+
; CHECK: [[Vec2:%.*]] = insertelement <2 x double> [[Vec]], double [[Dbl2]], i32 1
58+
; CHECK-NOT: extractvalue { <2 x double>, i1 }
6959
%data0 = extractvalue { <2 x double>, i1 } %load0, 0
7060
ret void
7161
}
@@ -80,8 +70,6 @@ define void @loadf64WithCheckBit() {
8070
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
8171
i32 0, i32 1, i32 1, i32 0, i1 false)
8272

83-
ret void
84-
8573
; check we load an <2 x i32> instead of a double
8674
; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.typedbuffer
8775
; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
@@ -92,16 +80,12 @@ define void @loadf64WithCheckBit() {
9280

9381
; check we extract the two i32 and construct a double
9482
; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
95-
; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i64 0
96-
; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i64 1
83+
; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0
84+
; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1
9785
; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
98-
; construct a new {double, i1}
99-
; CHECK: [[CB:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 1
100-
; CHECK: [[S1:%.*]] = insertvalue { double, i1 } poison, double [[DBL]], 0
101-
; CHECK: [[S2:%.*]] = insertvalue { double, i1 } [[S1]], i1 [[CB]], 1
102-
; CHECK: extractvalue { double, i1 } [[S2]], 0
10386
%data0 = extractvalue {double, i1} %load0, 0
104-
; CHECK: extractvalue { double, i1 } [[S2]], 1
87+
; CHECK: extractvalue { <2 x i32>, i1 } [[L0]], 1
88+
; CHECK-NOT: extractvalue { double, i1 }
10589
%cb = extractvalue {double, i1} %load0, 1
10690
ret void
10791
}

llvm/test/CodeGen/DirectX/BufferStoreDouble.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ define void @storef64(double %0) {
1414
; CHECK: [[SD:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double %0)
1515
; CHECK: [[Lo:%.*]] = extractvalue { i32, i32 } [[SD]], 0
1616
; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1
17-
; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i64 0
18-
; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i64 1
17+
; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i32 0
18+
; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i32 1
1919
; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_f64_1_0_0t.v2i32(
2020
; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec2]])
2121
call void @llvm.dx.resource.store.typedbuffer(

0 commit comments

Comments
 (0)