@@ -635,7 +635,8 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
635
635
break ;
636
636
637
637
auto IID = SrcCI->getIntrinsicID ();
638
- // llvm.amdgcn.rcp(llvm.amdgcn.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable
638
+ // llvm.amdgcn.rcp(llvm.amdgcn.sqrt(x)) -> llvm.amdgcn.rsq(x) if
639
+ // contractable
639
640
//
640
641
// llvm.amdgcn.rcp(llvm.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable and
641
642
// relaxed.
@@ -845,13 +846,13 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
845
846
break ;
846
847
}
847
848
case Intrinsic::amdgcn_cvt_off_f32_i4: {
848
- Value* Arg = II.getArgOperand (0 );
849
+ Value * Arg = II.getArgOperand (0 );
849
850
Type *Ty = II.getType ();
850
851
851
852
if (isa<PoisonValue>(Arg))
852
853
return IC.replaceInstUsesWith (II, PoisonValue::get (Ty));
853
854
854
- if (IC.getSimplifyQuery ().isUndefValue (Arg))
855
+ if (IC.getSimplifyQuery ().isUndefValue (Arg))
855
856
return IC.replaceInstUsesWith (II, Constant::getNullValue (Ty));
856
857
857
858
ConstantInt *CArg = dyn_cast<ConstantInt>(II.getArgOperand (0 ));
@@ -1629,18 +1630,18 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
1629
1630
}
1630
1631
}
1631
1632
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1632
- AMDGPU::getImageDimIntrinsicInfo (II.getIntrinsicID ())) {
1633
+ AMDGPU::getImageDimIntrinsicInfo (II.getIntrinsicID ())) {
1633
1634
return simplifyAMDGCNImageIntrinsic (ST, ImageDimIntr, II, IC);
1634
1635
}
1635
1636
return std::nullopt;
1636
1637
}
1637
1638
1638
1639
// / Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
1639
1640
// /
1640
- // / The result of simplifying amdgcn image and buffer store intrinsics is updating
1641
- // / definitions of the intrinsics vector argument, not Uses of the result like
1642
- // / image and buffer loads.
1643
- // / Note: This only supports non-TFE/LWE image intrinsic calls; those have
1641
+ // / The result of simplifying amdgcn image and buffer store intrinsics is
1642
+ // / updating definitions of the intrinsics vector argument, not Uses of the
1643
+ // / result like image and buffer loads. Note: This only supports non-TFE/LWE
1644
+ // / image intrinsic calls; those have
1644
1645
// / struct returns.
1645
1646
static Value *simplifyAMDGCNMemoryIntrinsicDemanded (InstCombiner &IC,
1646
1647
IntrinsicInst &II,
@@ -1837,7 +1838,12 @@ Value *GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded(
1837
1838
Value *Extract = IC.Builder .CreateExtractElement (Src, FirstElt);
1838
1839
1839
1840
// TODO: Preserve callsite attributes?
1840
- CallInst *NewCall = IC.Builder .CreateCall (Remangled, {Extract}, OpBundles);
1841
+ SmallVector<Value *> Args{Extract};
1842
+ if (II.arg_size () > 1 ) {
1843
+ for (int I = 1 ; I < II.arg_size (); ++I)
1844
+ Args.push_back (II.getArgOperand (1 ));
1845
+ }
1846
+ CallInst *NewCall = IC.Builder .CreateCall (Remangled, Args, OpBundles);
1841
1847
1842
1848
return IC.Builder .CreateInsertElement (PoisonValue::get (II.getType ()),
1843
1849
NewCall, FirstElt);
@@ -1872,6 +1878,7 @@ std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
1872
1878
SimplifyAndSetOp) const {
1873
1879
switch (II.getIntrinsicID ()) {
1874
1880
case Intrinsic::amdgcn_readfirstlane:
1881
+ case Intrinsic::amdgcn_readlane:
1875
1882
SimplifyAndSetOp (&II, 0 , DemandedElts, UndefElts);
1876
1883
return simplifyAMDGCNLaneIntrinsicDemanded (IC, II, DemandedElts, UndefElts);
1877
1884
case Intrinsic::amdgcn_raw_buffer_load:
0 commit comments