@@ -11664,7 +11664,7 @@ static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
11664
11664
return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
11665
11665
}
11666
11666
11667
- static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF,
11667
+ static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,
11668
11668
ArrayRef<Value *> Ops, bool IsSigned) {
11669
11669
unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
11670
11670
llvm::Type *Ty = Ops[1]->getType();
@@ -11676,6 +11676,7 @@ static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF,
11676
11676
Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
11677
11677
Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
11678
11678
} else {
11679
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
11679
11680
Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
11680
11681
: CGF.Builder.CreateUIToFP(Ops[0], Ty);
11681
11682
}
@@ -11684,8 +11685,9 @@ static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF,
11684
11685
}
11685
11686
11686
11687
// Lowers X86 FMA intrinsics to IR.
11687
- static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
11688
- unsigned BuiltinID, bool IsAddSub) {
11688
+ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
11689
+ ArrayRef<Value *> Ops, unsigned BuiltinID,
11690
+ bool IsAddSub) {
11689
11691
11690
11692
bool Subtract = false;
11691
11693
Intrinsic::ID IID = Intrinsic::not_intrinsic;
@@ -11742,6 +11744,7 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
11742
11744
llvm::Type *Ty = A->getType();
11743
11745
Function *FMA;
11744
11746
if (CGF.Builder.getIsFPConstrained()) {
11747
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
11745
11748
FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
11746
11749
Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
11747
11750
} else {
@@ -11783,10 +11786,10 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
11783
11786
return Res;
11784
11787
}
11785
11788
11786
- static Value *
11787
- EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops,
11788
- Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0,
11789
- bool NegAcc = false) {
11789
+ static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
11790
+ MutableArrayRef<Value *> Ops, Value *Upper ,
11791
+ bool ZeroMask = false, unsigned PTIdx = 0,
11792
+ bool NegAcc = false) {
11790
11793
unsigned Rnd = 4;
11791
11794
if (Ops.size() > 4)
11792
11795
Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
@@ -11805,6 +11808,7 @@ EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops,
11805
11808
Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
11806
11809
{Ops[0], Ops[1], Ops[2], Ops[4]});
11807
11810
} else if (CGF.Builder.getIsFPConstrained()) {
11811
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
11808
11812
Function *FMA = CGF.CGM.getIntrinsic(
11809
11813
Intrinsic::experimental_constrained_fma, Ops[0]->getType());
11810
11814
Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
@@ -12142,8 +12146,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
12142
12146
// TODO: The builtins could be removed if the SSE header files used vector
12143
12147
// extension comparisons directly (vector ordered/unordered may need
12144
12148
// additional support via __builtin_isnan()).
12145
- auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred,
12146
- bool IsSignaling) {
12149
+ auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
12150
+ bool IsSignaling) {
12151
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
12147
12152
Value *Cmp;
12148
12153
if (IsSignaling)
12149
12154
Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
@@ -12385,31 +12390,31 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
12385
12390
case X86::BI__builtin_ia32_cvtdq2ps512_mask:
12386
12391
case X86::BI__builtin_ia32_cvtqq2ps512_mask:
12387
12392
case X86::BI__builtin_ia32_cvtqq2pd512_mask:
12388
- return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true);
12393
+ return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
12389
12394
case X86::BI__builtin_ia32_cvtudq2ps512_mask:
12390
12395
case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
12391
12396
case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
12392
- return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false);
12397
+ return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
12393
12398
12394
12399
case X86::BI__builtin_ia32_vfmaddss3:
12395
12400
case X86::BI__builtin_ia32_vfmaddsd3:
12396
12401
case X86::BI__builtin_ia32_vfmaddss3_mask:
12397
12402
case X86::BI__builtin_ia32_vfmaddsd3_mask:
12398
- return EmitScalarFMAExpr(*this, Ops, Ops[0]);
12403
+ return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
12399
12404
case X86::BI__builtin_ia32_vfmaddss:
12400
12405
case X86::BI__builtin_ia32_vfmaddsd:
12401
- return EmitScalarFMAExpr(*this, Ops,
12406
+ return EmitScalarFMAExpr(*this, E, Ops,
12402
12407
Constant::getNullValue(Ops[0]->getType()));
12403
12408
case X86::BI__builtin_ia32_vfmaddss3_maskz:
12404
12409
case X86::BI__builtin_ia32_vfmaddsd3_maskz:
12405
- return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true);
12410
+ return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
12406
12411
case X86::BI__builtin_ia32_vfmaddss3_mask3:
12407
12412
case X86::BI__builtin_ia32_vfmaddsd3_mask3:
12408
- return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2);
12413
+ return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
12409
12414
case X86::BI__builtin_ia32_vfmsubss3_mask3:
12410
12415
case X86::BI__builtin_ia32_vfmsubsd3_mask3:
12411
- return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2,
12412
- /*NegAcc*/true);
12416
+ return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
12417
+ /*NegAcc*/ true);
12413
12418
case X86::BI__builtin_ia32_vfmaddps:
12414
12419
case X86::BI__builtin_ia32_vfmaddpd:
12415
12420
case X86::BI__builtin_ia32_vfmaddps256:
@@ -12422,7 +12427,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
12422
12427
case X86::BI__builtin_ia32_vfmaddpd512_maskz:
12423
12428
case X86::BI__builtin_ia32_vfmaddpd512_mask3:
12424
12429
case X86::BI__builtin_ia32_vfmsubpd512_mask3:
12425
- return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
12430
+ return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
12426
12431
case X86::BI__builtin_ia32_vfmaddsubps512_mask:
12427
12432
case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
12428
12433
case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
@@ -12431,7 +12436,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
12431
12436
case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
12432
12437
case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
12433
12438
case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
12434
- return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true);
12439
+ return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
12435
12440
12436
12441
case X86::BI__builtin_ia32_movdqa32store128_mask:
12437
12442
case X86::BI__builtin_ia32_movdqa64store128_mask:
@@ -13577,6 +13582,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
13577
13582
Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
13578
13583
Function *F;
13579
13584
if (Builder.getIsFPConstrained()) {
13585
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
13580
13586
F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
13581
13587
A->getType());
13582
13588
A = Builder.CreateConstrainedFPCall(F, {A});
@@ -13600,6 +13606,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
13600
13606
Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
13601
13607
Function *F;
13602
13608
if (Builder.getIsFPConstrained()) {
13609
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
13603
13610
F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
13604
13611
A->getType());
13605
13612
A = Builder.CreateConstrainedFPCall(F, A);
@@ -13629,6 +13636,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
13629
13636
}
13630
13637
}
13631
13638
if (Builder.getIsFPConstrained()) {
13639
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
13632
13640
Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
13633
13641
Ops[0]->getType());
13634
13642
return Builder.CreateConstrainedFPCall(F, Ops[0]);
@@ -14173,6 +14181,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
14173
14181
if (IsMaskFCmp) {
14174
14182
// We ignore SAE if strict FP is disabled. We only keep precise
14175
14183
// exception behavior under strict FP.
14184
+ // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
14185
+ // object will be required.
14176
14186
unsigned NumElts =
14177
14187
cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14178
14188
Value *Cmp;
@@ -14225,8 +14235,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
14225
14235
case X86::BI__builtin_ia32_vcvtph2ps256:
14226
14236
case X86::BI__builtin_ia32_vcvtph2ps_mask:
14227
14237
case X86::BI__builtin_ia32_vcvtph2ps256_mask:
14228
- case X86::BI__builtin_ia32_vcvtph2ps512_mask:
14238
+ case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
14239
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14229
14240
return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
14241
+ }
14230
14242
14231
14243
// AVX512 bf16 intrinsics
14232
14244
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
0 commit comments