Skip to content

Commit 81b6987

Browse files
committed
[FPEnv][X86] Platform builtins edition: clang should get from the AST the metadata for constrained FP builtins
Currently clang is not correctly retrieving from the AST the metadata for constrained FP builtins. This patch fixes that for the X86 specific builtins. Differential Revision: https://reviews.llvm.org/D94614
1 parent be0bf04 commit 81b6987

File tree

6 files changed

+335
-28
lines changed

6 files changed

+335
-28
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11664,7 +11664,7 @@ static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
1166411664
return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
1166511665
}
1166611666

11667-
static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF,
11667+
static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,
1166811668
ArrayRef<Value *> Ops, bool IsSigned) {
1166911669
unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
1167011670
llvm::Type *Ty = Ops[1]->getType();
@@ -11676,6 +11676,7 @@ static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF,
1167611676
Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
1167711677
Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
1167811678
} else {
11679+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
1167911680
Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
1168011681
: CGF.Builder.CreateUIToFP(Ops[0], Ty);
1168111682
}
@@ -11684,8 +11685,9 @@ static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF,
1168411685
}
1168511686

1168611687
// Lowers X86 FMA intrinsics to IR.
11687-
static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
11688-
unsigned BuiltinID, bool IsAddSub) {
11688+
static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
11689+
ArrayRef<Value *> Ops, unsigned BuiltinID,
11690+
bool IsAddSub) {
1168911691

1169011692
bool Subtract = false;
1169111693
Intrinsic::ID IID = Intrinsic::not_intrinsic;
@@ -11742,6 +11744,7 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
1174211744
llvm::Type *Ty = A->getType();
1174311745
Function *FMA;
1174411746
if (CGF.Builder.getIsFPConstrained()) {
11747+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
1174511748
FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
1174611749
Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
1174711750
} else {
@@ -11783,10 +11786,10 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
1178311786
return Res;
1178411787
}
1178511788

11786-
static Value *
11787-
EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops,
11788-
Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0,
11789-
bool NegAcc = false) {
11789+
static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
11790+
MutableArrayRef<Value *> Ops, Value *Upper,
11791+
bool ZeroMask = false, unsigned PTIdx = 0,
11792+
bool NegAcc = false) {
1179011793
unsigned Rnd = 4;
1179111794
if (Ops.size() > 4)
1179211795
Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
@@ -11805,6 +11808,7 @@ EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops,
1180511808
Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
1180611809
{Ops[0], Ops[1], Ops[2], Ops[4]});
1180711810
} else if (CGF.Builder.getIsFPConstrained()) {
11811+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
1180811812
Function *FMA = CGF.CGM.getIntrinsic(
1180911813
Intrinsic::experimental_constrained_fma, Ops[0]->getType());
1181011814
Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
@@ -12142,8 +12146,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1214212146
// TODO: The builtins could be removed if the SSE header files used vector
1214312147
// extension comparisons directly (vector ordered/unordered may need
1214412148
// additional support via __builtin_isnan()).
12145-
auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred,
12146-
bool IsSignaling) {
12149+
auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
12150+
bool IsSignaling) {
12151+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
1214712152
Value *Cmp;
1214812153
if (IsSignaling)
1214912154
Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
@@ -12385,31 +12390,31 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1238512390
case X86::BI__builtin_ia32_cvtdq2ps512_mask:
1238612391
case X86::BI__builtin_ia32_cvtqq2ps512_mask:
1238712392
case X86::BI__builtin_ia32_cvtqq2pd512_mask:
12388-
return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true);
12393+
return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
1238912394
case X86::BI__builtin_ia32_cvtudq2ps512_mask:
1239012395
case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
1239112396
case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
12392-
return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false);
12397+
return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
1239312398

1239412399
case X86::BI__builtin_ia32_vfmaddss3:
1239512400
case X86::BI__builtin_ia32_vfmaddsd3:
1239612401
case X86::BI__builtin_ia32_vfmaddss3_mask:
1239712402
case X86::BI__builtin_ia32_vfmaddsd3_mask:
12398-
return EmitScalarFMAExpr(*this, Ops, Ops[0]);
12403+
return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
1239912404
case X86::BI__builtin_ia32_vfmaddss:
1240012405
case X86::BI__builtin_ia32_vfmaddsd:
12401-
return EmitScalarFMAExpr(*this, Ops,
12406+
return EmitScalarFMAExpr(*this, E, Ops,
1240212407
Constant::getNullValue(Ops[0]->getType()));
1240312408
case X86::BI__builtin_ia32_vfmaddss3_maskz:
1240412409
case X86::BI__builtin_ia32_vfmaddsd3_maskz:
12405-
return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true);
12410+
return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
1240612411
case X86::BI__builtin_ia32_vfmaddss3_mask3:
1240712412
case X86::BI__builtin_ia32_vfmaddsd3_mask3:
12408-
return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2);
12413+
return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
1240912414
case X86::BI__builtin_ia32_vfmsubss3_mask3:
1241012415
case X86::BI__builtin_ia32_vfmsubsd3_mask3:
12411-
return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2,
12412-
/*NegAcc*/true);
12416+
return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
12417+
/*NegAcc*/ true);
1241312418
case X86::BI__builtin_ia32_vfmaddps:
1241412419
case X86::BI__builtin_ia32_vfmaddpd:
1241512420
case X86::BI__builtin_ia32_vfmaddps256:
@@ -12422,7 +12427,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1242212427
case X86::BI__builtin_ia32_vfmaddpd512_maskz:
1242312428
case X86::BI__builtin_ia32_vfmaddpd512_mask3:
1242412429
case X86::BI__builtin_ia32_vfmsubpd512_mask3:
12425-
return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
12430+
return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
1242612431
case X86::BI__builtin_ia32_vfmaddsubps512_mask:
1242712432
case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
1242812433
case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
@@ -12431,7 +12436,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1243112436
case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
1243212437
case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
1243312438
case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
12434-
return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true);
12439+
return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
1243512440

1243612441
case X86::BI__builtin_ia32_movdqa32store128_mask:
1243712442
case X86::BI__builtin_ia32_movdqa64store128_mask:
@@ -13577,6 +13582,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1357713582
Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
1357813583
Function *F;
1357913584
if (Builder.getIsFPConstrained()) {
13585+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
1358013586
F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
1358113587
A->getType());
1358213588
A = Builder.CreateConstrainedFPCall(F, {A});
@@ -13600,6 +13606,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1360013606
Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
1360113607
Function *F;
1360213608
if (Builder.getIsFPConstrained()) {
13609+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
1360313610
F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
1360413611
A->getType());
1360513612
A = Builder.CreateConstrainedFPCall(F, A);
@@ -13629,6 +13636,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1362913636
}
1363013637
}
1363113638
if (Builder.getIsFPConstrained()) {
13639+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
1363213640
Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
1363313641
Ops[0]->getType());
1363413642
return Builder.CreateConstrainedFPCall(F, Ops[0]);
@@ -14173,6 +14181,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1417314181
if (IsMaskFCmp) {
1417414182
// We ignore SAE if strict FP is disabled. We only keep precise
1417514183
// exception behavior under strict FP.
14184+
// NOTE: If strict FP does ever go through here a CGFPOptionsRAII
14185+
// object will be required.
1417614186
unsigned NumElts =
1417714187
cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1417814188
Value *Cmp;
@@ -14225,8 +14235,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1422514235
case X86::BI__builtin_ia32_vcvtph2ps256:
1422614236
case X86::BI__builtin_ia32_vcvtph2ps_mask:
1422714237
case X86::BI__builtin_ia32_vcvtph2ps256_mask:
14228-
case X86::BI__builtin_ia32_vcvtph2ps512_mask:
14238+
case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
14239+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
1422914240
return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
14241+
}
1423014242

1423114243
// AVX512 bf16 intrinsics
1423214244
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {

clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s
1+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=maytrap -o - -Wall -Werror | FileCheck %s
2+
3+
// Test that the constrained intrinsics are picking up the exception
4+
// metadata from the AST instead of the global default from the command line.
5+
6+
#pragma float_control(except, on)
27

38

49
#include <immintrin.h>

0 commit comments

Comments
 (0)