@@ -11664,7 +11664,7 @@ static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
1166411664 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
1166511665}
1166611666
11667- static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF,
11667+ static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,
1166811668 ArrayRef<Value *> Ops, bool IsSigned) {
1166911669 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
1167011670 llvm::Type *Ty = Ops[1]->getType();
@@ -11676,6 +11676,7 @@ static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF,
1167611676 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
1167711677 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
1167811678 } else {
11679+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
1167911680 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
1168011681 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
1168111682 }
@@ -11684,8 +11685,9 @@ static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF,
1168411685}
1168511686
1168611687// Lowers X86 FMA intrinsics to IR.
11687- static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
11688- unsigned BuiltinID, bool IsAddSub) {
11688+ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
11689+ ArrayRef<Value *> Ops, unsigned BuiltinID,
11690+ bool IsAddSub) {
1168911691
1169011692 bool Subtract = false;
1169111693 Intrinsic::ID IID = Intrinsic::not_intrinsic;
@@ -11742,6 +11744,7 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
1174211744 llvm::Type *Ty = A->getType();
1174311745 Function *FMA;
1174411746 if (CGF.Builder.getIsFPConstrained()) {
11747+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
1174511748 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
1174611749 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
1174711750 } else {
@@ -11783,10 +11786,10 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
1178311786 return Res;
1178411787}
1178511788
11786- static Value *
11787- EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops,
11788- Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0,
11789- bool NegAcc = false) {
11789+ static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
11790+ MutableArrayRef<Value *> Ops, Value *Upper ,
11791+ bool ZeroMask = false, unsigned PTIdx = 0,
11792+ bool NegAcc = false) {
1179011793 unsigned Rnd = 4;
1179111794 if (Ops.size() > 4)
1179211795 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
@@ -11805,6 +11808,7 @@ EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops,
1180511808 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
1180611809 {Ops[0], Ops[1], Ops[2], Ops[4]});
1180711810 } else if (CGF.Builder.getIsFPConstrained()) {
11811+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
1180811812 Function *FMA = CGF.CGM.getIntrinsic(
1180911813 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
1181011814 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
@@ -12142,8 +12146,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1214212146 // TODO: The builtins could be removed if the SSE header files used vector
1214312147 // extension comparisons directly (vector ordered/unordered may need
1214412148 // additional support via __builtin_isnan()).
12145- auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred,
12146- bool IsSignaling) {
12149+ auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
12150+ bool IsSignaling) {
12151+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
1214712152 Value *Cmp;
1214812153 if (IsSignaling)
1214912154 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
@@ -12385,31 +12390,31 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1238512390 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
1238612391 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
1238712392 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
12388- return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true);
12393+ return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
1238912394 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
1239012395 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
1239112396 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
12392- return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false);
12397+ return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
1239312398
1239412399 case X86::BI__builtin_ia32_vfmaddss3:
1239512400 case X86::BI__builtin_ia32_vfmaddsd3:
1239612401 case X86::BI__builtin_ia32_vfmaddss3_mask:
1239712402 case X86::BI__builtin_ia32_vfmaddsd3_mask:
12398- return EmitScalarFMAExpr(*this, Ops, Ops[0]);
12403+ return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
1239912404 case X86::BI__builtin_ia32_vfmaddss:
1240012405 case X86::BI__builtin_ia32_vfmaddsd:
12401- return EmitScalarFMAExpr(*this, Ops,
12406+ return EmitScalarFMAExpr(*this, E, Ops,
1240212407 Constant::getNullValue(Ops[0]->getType()));
1240312408 case X86::BI__builtin_ia32_vfmaddss3_maskz:
1240412409 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
12405- return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true);
12410+ return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
1240612411 case X86::BI__builtin_ia32_vfmaddss3_mask3:
1240712412 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
12408- return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2);
12413+ return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
1240912414 case X86::BI__builtin_ia32_vfmsubss3_mask3:
1241012415 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
12411- return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2,
12412- /*NegAcc*/true);
12416+ return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
12417+ /*NegAcc*/ true);
1241312418 case X86::BI__builtin_ia32_vfmaddps:
1241412419 case X86::BI__builtin_ia32_vfmaddpd:
1241512420 case X86::BI__builtin_ia32_vfmaddps256:
@@ -12422,7 +12427,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1242212427 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
1242312428 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
1242412429 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
12425- return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
12430+ return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
1242612431 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
1242712432 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
1242812433 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
@@ -12431,7 +12436,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1243112436 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
1243212437 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
1243312438 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
12434- return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true);
12439+ return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
1243512440
1243612441 case X86::BI__builtin_ia32_movdqa32store128_mask:
1243712442 case X86::BI__builtin_ia32_movdqa64store128_mask:
@@ -13577,6 +13582,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1357713582 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
1357813583 Function *F;
1357913584 if (Builder.getIsFPConstrained()) {
13585+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
1358013586 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
1358113587 A->getType());
1358213588 A = Builder.CreateConstrainedFPCall(F, {A});
@@ -13600,6 +13606,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1360013606 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
1360113607 Function *F;
1360213608 if (Builder.getIsFPConstrained()) {
13609+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
1360313610 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
1360413611 A->getType());
1360513612 A = Builder.CreateConstrainedFPCall(F, A);
@@ -13629,6 +13636,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1362913636 }
1363013637 }
1363113638 if (Builder.getIsFPConstrained()) {
13639+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
1363213640 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
1363313641 Ops[0]->getType());
1363413642 return Builder.CreateConstrainedFPCall(F, Ops[0]);
@@ -14173,6 +14181,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1417314181 if (IsMaskFCmp) {
1417414182 // We ignore SAE if strict FP is disabled. We only keep precise
1417514183 // exception behavior under strict FP.
14184+ // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
14185+ // object will be required.
1417614186 unsigned NumElts =
1417714187 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1417814188 Value *Cmp;
@@ -14225,8 +14235,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1422514235 case X86::BI__builtin_ia32_vcvtph2ps256:
1422614236 case X86::BI__builtin_ia32_vcvtph2ps_mask:
1422714237 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
14228- case X86::BI__builtin_ia32_vcvtph2ps512_mask:
14238+ case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
14239+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
1422914240 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
14241+ }
1423014242
1423114243// AVX512 bf16 intrinsics
1423214244 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
0 commit comments