-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[clang][LoongArch] Add support for the _Float16 type #141703
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: None (Ami-zhang) ChangesEnable _Float16 for LoongArch target. Additionally, this change fixes incorrect ABI lowering of _Float16 in the case of structs containing fp16 that are eligible for passing via GPR+FPR or FPR+FPR. Finally, it also fixes int16 -> __fp16 conversion code gen, which uses generic LLVM IR rather than llvm.convert.to.fp16 intrinsics. Full diff: https://github.com/llvm/llvm-project/pull/141703.diff 5 Files Affected:
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index a40dd4d1a1673..088f01a0199e4 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -1001,6 +1001,7 @@ to ``float``; see below for more information on this emulation.
* X86 (if SSE2 is available; natively if AVX512-FP16 is also available)
* RISC-V (natively if Zfh or Zhinx is available)
* SystemZ (emulated)
+ * LoongArch
* ``__bf16`` is supported on the following targets (currently never natively):
diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h
index 4c7b53abfef9b..8a8c978ab89db 100644
--- a/clang/lib/Basic/Targets/LoongArch.h
+++ b/clang/lib/Basic/Targets/LoongArch.h
@@ -53,6 +53,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
LongDoubleAlign = 128;
LongDoubleFormat = &llvm::APFloat::IEEEquad();
MCountName = "_mcount";
+ HasFloat16 = true;
SuitableAlign = 128;
WCharType = SignedInt;
WIntType = UnsignedInt;
@@ -98,6 +99,8 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
bool hasBitIntType() const override { return true; }
+ bool useFP16ConversionIntrinsics() const override { return false; }
+
bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) override;
diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp
index 0f689371a60db..7640f3779816a 100644
--- a/clang/lib/CodeGen/Targets/LoongArch.cpp
+++ b/clang/lib/CodeGen/Targets/LoongArch.cpp
@@ -110,10 +110,9 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper(
uint64_t Size = getContext().getTypeSize(Ty);
if (IsInt && Size > GRLen)
return false;
- // Can't be eligible if larger than the FP registers. Half precision isn't
- // currently supported on LoongArch and the ABI hasn't been confirmed, so
- // default to the integer ABI in that case.
- if (IsFloat && (Size > FRLen || Size < 32))
+ // Can't be eligible if larger than the FP registers. Handling of half
+ // precision values has been specified in the ABI, so don't block those.
+ if (IsFloat && Size > FRLen)
return false;
// Can't be eligible if an integer type was already found (int+int pairs
// are not eligible).
diff --git a/clang/test/CodeGen/LoongArch/__fp16-convert.c b/clang/test/CodeGen/LoongArch/__fp16-convert.c
new file mode 100644
index 0000000000000..84ef5de960b47
--- /dev/null
+++ b/clang/test/CodeGen/LoongArch/__fp16-convert.c
@@ -0,0 +1,30 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple loongarch64 -emit-llvm %s -o - \
+// RUN: | FileCheck %s
+
+__fp16 y;
+short z;
+// CHECK-LABEL: define dso_local void @bar1(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr @y, align 2
+// CHECK-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[CONV]] to i16
+// CHECK-NEXT: store i16 [[CONV1]], ptr @z, align 2
+// CHECK-NEXT: ret void
+//
+void bar1(){
+ z = y;
+}
+// CHECK-LABEL: define dso_local void @bar2(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @z, align 2
+// CHECK-NEXT: [[CONV:%.*]] = sitofp i16 [[TMP0]] to float
+// CHECK-NEXT: [[CONV1:%.*]] = fptrunc float [[CONV]] to half
+// CHECK-NEXT: store half [[CONV1]], ptr @y, align 2
+// CHECK-NEXT: ret void
+//
+void bar2(){
+ y = z;
+}
diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d.c b/clang/test/CodeGen/LoongArch/abi-lp64d.c
index fc7f1eada586b..9f64cfd662e5f 100644
--- a/clang/test/CodeGen/LoongArch/abi-lp64d.c
+++ b/clang/test/CodeGen/LoongArch/abi-lp64d.c
@@ -48,6 +48,9 @@ unsigned long check_ulong() { return 0; }
// CHECK-LABEL: define{{.*}} i64 @check_ulonglong()
unsigned long long check_ulonglong() { return 0; }
+// CHECK-LABEL: define{{.*}} half @check_float16()
+_Float16 check_float16() { return 0; }
+
// CHECK-LABEL: define{{.*}} float @check_float()
float check_float() { return 0; }
@@ -127,6 +130,14 @@ struct i16x4_s f_i16x4_s(struct i16x4_s x) {
/// available, the value is passed in a GAR; if no GAR is available, the value
/// is passed on the stack.
+struct f16x1_s {
+ __fp16 a;
+};
+
+struct float16x1_s {
+ _Float16 a;
+};
+
struct f32x1_s {
float a;
};
@@ -135,6 +146,16 @@ struct f64x1_s {
double a;
};
+// CHECK-LABEL: define{{.*}} half @f_f16x1_s(half %0)
+struct f16x1_s f_f16x1_s(struct f16x1_s x) {
+ return x;
+}
+
+// CHECK-LABEL: define{{.*}} half @f_float16x1_s(half %0)
+struct float16x1_s f_float16x1_s(struct float16x1_s x) {
+ return x;
+}
+
// CHECK-LABEL: define{{.*}} float @f_f32x1_s(float %0)
struct f32x1_s f_f32x1_s(struct f32x1_s x) {
return x;
@@ -151,10 +172,20 @@ struct f64x1_s f_f64x1_s(struct f64x1_s x) {
/// number of available FAR is less than 2, it’s passed in a GAR, and passed on
/// the stack if no GAR is available.
+struct f16x2_s {
+ __fp16 a;
+ _Float16 b;
+};
+
struct f32x2_s {
float a, b;
};
+// CHECK-LABEL: define{{.*}} { half, half } @f_f16x2_s(half %0, half %1)
+struct f16x2_s f_f16x2_s(struct f16x2_s x) {
+ return x;
+}
+
// CHECK-LABEL: define{{.*}} { float, float } @f_f32x2_s(float %0, float %1)
struct f32x2_s f_f32x2_s(struct f32x2_s x) {
return x;
@@ -165,11 +196,21 @@ struct f32x2_s f_f32x2_s(struct f32x2_s x) {
/// i. Multiple fixed-point members. If there are available GAR, the structure
/// is passed in a GAR, and passed on the stack if no GAR is available.
+struct f16x1_i16x2_s {
+ _Float16 a;
+ int16_t b, c;
+};
+
struct f32x1_i16x2_s {
float a;
int16_t b, c;
};
+// CHECK-LABEL: define{{.*}} i64 @f_f16x1_i16x2_s(i64 %x.coerce)
+struct f16x1_i16x2_s f_f16x1_i16x2_s(struct f16x1_i16x2_s x) {
+ return x;
+}
+
// CHECK-LABEL: define{{.*}} i64 @f_f32x1_i16x2_s(i64 %x.coerce)
struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) {
return x;
@@ -181,11 +222,21 @@ struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) {
/// but one GAR is available, it’s passed in GAR; If no GAR is available, it’s
/// passed on the stack.
+struct f16x1_i32x1_s {
+ _Float16 a;
+ int32_t b;
+};
+
struct f32x1_i32x1_s {
float a;
int32_t b;
};
+// CHECK-LABEL: define{{.*}} { half, i32 } @f_f16x1_i32x1_s(half %0, i32 %1)
+struct f16x1_i32x1_s f_f16x1_i32x1_s(struct f16x1_i32x1_s x) {
+ return x;
+}
+
// CHECK-LABEL: define{{.*}} { float, i32 } @f_f32x1_i32x1_s(float %0, i32 %1)
struct f32x1_i32x1_s f_f32x1_i32x1_s(struct f32x1_i32x1_s x) {
return x;
@@ -253,6 +304,16 @@ struct f32x4_s f_f32x4_s(struct f32x4_s x) {
return x;
}
+struct f16x5_s {
+ _Float16 a, b, c, d;
+ __fp16 e;
+};
+
+// CHECK-LABEL: define{{.*}} [2 x i64] @f_f16x5_s([2 x i64] %x.coerce)
+struct f16x5_s f_f16x5_s(struct f16x5_s x) {
+ return x;
+}
+
/// ii. The structure with two double members is passed in a pair of available
/// FARs. If no a pair of available FARs, it’s passed in GARs. A structure with
/// one double member and one float member is same.
@@ -312,6 +373,16 @@ struct f32x2_i32x2_s f_f32x2_i32x2_s(struct f32x2_i32x2_s x) {
return x;
}
+struct f16x4_i32x2_s {
+ _Float16 a, b, c, d;
+ int32_t e, f;
+};
+
+// CHECK-LABEL: define{{.*}} [2 x i64] @f_f16x4_i32x2_s([2 x i64] %x.coerce)
+struct f16x4_i32x2_s f_f16x4_i32x2_s(struct f16x4_i32x2_s x) {
+ return x;
+}
+
/// 3. WOA > 2 × GRLEN
/// a. It’s passed by reference and are replaced in the argument list with the
/// address. If there is an available GAR, the reference is passed in the GAR,
|
@llvm/pr-subscribers-backend-loongarch Author: None (Ami-zhang) ChangesEnable _Float16 for LoongArch target. Additionally, this change fixes incorrect ABI lowering of _Float16 in the case of structs containing fp16 that are eligible for passing via GPR+FPR or FPR+FPR. Finally, it also fixes int16 -> __fp16 conversion code gen, which uses generic LLVM IR rather than llvm.convert.to.fp16 intrinsics. Full diff: https://github.com/llvm/llvm-project/pull/141703.diff 5 Files Affected:
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index a40dd4d1a1673..088f01a0199e4 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -1001,6 +1001,7 @@ to ``float``; see below for more information on this emulation.
* X86 (if SSE2 is available; natively if AVX512-FP16 is also available)
* RISC-V (natively if Zfh or Zhinx is available)
* SystemZ (emulated)
+ * LoongArch
* ``__bf16`` is supported on the following targets (currently never natively):
diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h
index 4c7b53abfef9b..8a8c978ab89db 100644
--- a/clang/lib/Basic/Targets/LoongArch.h
+++ b/clang/lib/Basic/Targets/LoongArch.h
@@ -53,6 +53,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
LongDoubleAlign = 128;
LongDoubleFormat = &llvm::APFloat::IEEEquad();
MCountName = "_mcount";
+ HasFloat16 = true;
SuitableAlign = 128;
WCharType = SignedInt;
WIntType = UnsignedInt;
@@ -98,6 +99,8 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
bool hasBitIntType() const override { return true; }
+ bool useFP16ConversionIntrinsics() const override { return false; }
+
bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) override;
diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp
index 0f689371a60db..7640f3779816a 100644
--- a/clang/lib/CodeGen/Targets/LoongArch.cpp
+++ b/clang/lib/CodeGen/Targets/LoongArch.cpp
@@ -110,10 +110,9 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper(
uint64_t Size = getContext().getTypeSize(Ty);
if (IsInt && Size > GRLen)
return false;
- // Can't be eligible if larger than the FP registers. Half precision isn't
- // currently supported on LoongArch and the ABI hasn't been confirmed, so
- // default to the integer ABI in that case.
- if (IsFloat && (Size > FRLen || Size < 32))
+ // Can't be eligible if larger than the FP registers. Handling of half
+ // precision values has been specified in the ABI, so don't block those.
+ if (IsFloat && Size > FRLen)
return false;
// Can't be eligible if an integer type was already found (int+int pairs
// are not eligible).
diff --git a/clang/test/CodeGen/LoongArch/__fp16-convert.c b/clang/test/CodeGen/LoongArch/__fp16-convert.c
new file mode 100644
index 0000000000000..84ef5de960b47
--- /dev/null
+++ b/clang/test/CodeGen/LoongArch/__fp16-convert.c
@@ -0,0 +1,30 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple loongarch64 -emit-llvm %s -o - \
+// RUN: | FileCheck %s
+
+__fp16 y;
+short z;
+// CHECK-LABEL: define dso_local void @bar1(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr @y, align 2
+// CHECK-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[CONV]] to i16
+// CHECK-NEXT: store i16 [[CONV1]], ptr @z, align 2
+// CHECK-NEXT: ret void
+//
+void bar1(){
+ z = y;
+}
+// CHECK-LABEL: define dso_local void @bar2(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @z, align 2
+// CHECK-NEXT: [[CONV:%.*]] = sitofp i16 [[TMP0]] to float
+// CHECK-NEXT: [[CONV1:%.*]] = fptrunc float [[CONV]] to half
+// CHECK-NEXT: store half [[CONV1]], ptr @y, align 2
+// CHECK-NEXT: ret void
+//
+void bar2(){
+ y = z;
+}
diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d.c b/clang/test/CodeGen/LoongArch/abi-lp64d.c
index fc7f1eada586b..9f64cfd662e5f 100644
--- a/clang/test/CodeGen/LoongArch/abi-lp64d.c
+++ b/clang/test/CodeGen/LoongArch/abi-lp64d.c
@@ -48,6 +48,9 @@ unsigned long check_ulong() { return 0; }
// CHECK-LABEL: define{{.*}} i64 @check_ulonglong()
unsigned long long check_ulonglong() { return 0; }
+// CHECK-LABEL: define{{.*}} half @check_float16()
+_Float16 check_float16() { return 0; }
+
// CHECK-LABEL: define{{.*}} float @check_float()
float check_float() { return 0; }
@@ -127,6 +130,14 @@ struct i16x4_s f_i16x4_s(struct i16x4_s x) {
/// available, the value is passed in a GAR; if no GAR is available, the value
/// is passed on the stack.
+struct f16x1_s {
+ __fp16 a;
+};
+
+struct float16x1_s {
+ _Float16 a;
+};
+
struct f32x1_s {
float a;
};
@@ -135,6 +146,16 @@ struct f64x1_s {
double a;
};
+// CHECK-LABEL: define{{.*}} half @f_f16x1_s(half %0)
+struct f16x1_s f_f16x1_s(struct f16x1_s x) {
+ return x;
+}
+
+// CHECK-LABEL: define{{.*}} half @f_float16x1_s(half %0)
+struct float16x1_s f_float16x1_s(struct float16x1_s x) {
+ return x;
+}
+
// CHECK-LABEL: define{{.*}} float @f_f32x1_s(float %0)
struct f32x1_s f_f32x1_s(struct f32x1_s x) {
return x;
@@ -151,10 +172,20 @@ struct f64x1_s f_f64x1_s(struct f64x1_s x) {
/// number of available FAR is less than 2, it’s passed in a GAR, and passed on
/// the stack if no GAR is available.
+struct f16x2_s {
+ __fp16 a;
+ _Float16 b;
+};
+
struct f32x2_s {
float a, b;
};
+// CHECK-LABEL: define{{.*}} { half, half } @f_f16x2_s(half %0, half %1)
+struct f16x2_s f_f16x2_s(struct f16x2_s x) {
+ return x;
+}
+
// CHECK-LABEL: define{{.*}} { float, float } @f_f32x2_s(float %0, float %1)
struct f32x2_s f_f32x2_s(struct f32x2_s x) {
return x;
@@ -165,11 +196,21 @@ struct f32x2_s f_f32x2_s(struct f32x2_s x) {
/// i. Multiple fixed-point members. If there are available GAR, the structure
/// is passed in a GAR, and passed on the stack if no GAR is available.
+struct f16x1_i16x2_s {
+ _Float16 a;
+ int16_t b, c;
+};
+
struct f32x1_i16x2_s {
float a;
int16_t b, c;
};
+// CHECK-LABEL: define{{.*}} i64 @f_f16x1_i16x2_s(i64 %x.coerce)
+struct f16x1_i16x2_s f_f16x1_i16x2_s(struct f16x1_i16x2_s x) {
+ return x;
+}
+
// CHECK-LABEL: define{{.*}} i64 @f_f32x1_i16x2_s(i64 %x.coerce)
struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) {
return x;
@@ -181,11 +222,21 @@ struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) {
/// but one GAR is available, it’s passed in GAR; If no GAR is available, it’s
/// passed on the stack.
+struct f16x1_i32x1_s {
+ _Float16 a;
+ int32_t b;
+};
+
struct f32x1_i32x1_s {
float a;
int32_t b;
};
+// CHECK-LABEL: define{{.*}} { half, i32 } @f_f16x1_i32x1_s(half %0, i32 %1)
+struct f16x1_i32x1_s f_f16x1_i32x1_s(struct f16x1_i32x1_s x) {
+ return x;
+}
+
// CHECK-LABEL: define{{.*}} { float, i32 } @f_f32x1_i32x1_s(float %0, i32 %1)
struct f32x1_i32x1_s f_f32x1_i32x1_s(struct f32x1_i32x1_s x) {
return x;
@@ -253,6 +304,16 @@ struct f32x4_s f_f32x4_s(struct f32x4_s x) {
return x;
}
+struct f16x5_s {
+ _Float16 a, b, c, d;
+ __fp16 e;
+};
+
+// CHECK-LABEL: define{{.*}} [2 x i64] @f_f16x5_s([2 x i64] %x.coerce)
+struct f16x5_s f_f16x5_s(struct f16x5_s x) {
+ return x;
+}
+
/// ii. The structure with two double members is passed in a pair of available
/// FARs. If no a pair of available FARs, it’s passed in GARs. A structure with
/// one double member and one float member is same.
@@ -312,6 +373,16 @@ struct f32x2_i32x2_s f_f32x2_i32x2_s(struct f32x2_i32x2_s x) {
return x;
}
+struct f16x4_i32x2_s {
+ _Float16 a, b, c, d;
+ int32_t e, f;
+};
+
+// CHECK-LABEL: define{{.*}} [2 x i64] @f_f16x4_i32x2_s([2 x i64] %x.coerce)
+struct f16x4_i32x2_s f_f16x4_i32x2_s(struct f16x4_i32x2_s x) {
+ return x;
+}
+
/// 3. WOA > 2 × GRLEN
/// a. It’s passed by reference and are replaced in the argument list with the
/// address. If there is an available GAR, the reference is passed in the GAR,
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Enable _Float16 for LoongArch target. Additionally, this change fixes incorrect ABI lowering of _Float16 in the case of structs containing fp16 that are eligible for passing via GPR+FPR or FPR+FPR. Finally, it also fixes int16 -> __fp16 conversion code gen, which uses generic LLVM IR rather than llvm.convert.to.fp16 intrinsics.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks.
Enable _Float16 for LoongArch target. Additionally, this change fixes incorrect ABI lowering of _Float16 in the case of structs containing fp16 that are eligible for passing via GPR+FPR or FPR+FPR. Finally, it also fixes int16 -> __fp16 conversion code gen, which uses generic LLVM IR rather than llvm.convert.to.fp16 intrinsics.
Enable _Float16 for LoongArch target. Additionally, this change fixes incorrect ABI lowering of _Float16 in the case of structs containing fp16 that are eligible for passing via GPR+FPR or FPR+FPR. Finally, it also fixes int16 -> __fp16 conversion code gen, which uses generic LLVM IR rather than llvm.convert.to.fp16 intrinsics.
Enable _Float16 for LoongArch target. Additionally, this change fixes incorrect ABI lowering of _Float16 in the case of structs containing fp16 that are eligible for passing via GPR+FPR or FPR+FPR. Finally, it also fixes int16 -> __fp16 conversion code gen, which uses generic LLVM IR rather than llvm.convert.to.fp16 intrinsics.
Enable _Float16 for LoongArch target. Additionally, this change fixes incorrect ABI lowering of _Float16 in the case of structs containing fp16 that are eligible for passing via GPR+FPR or FPR+FPR. Finally, it also fixes int16 -> __fp16 conversion code gen, which uses generic LLVM IR rather than llvm.convert.to.fp16 intrinsics.