Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make have_fma consistent between interpreter and compiled #52206

Merged
merged 2 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@
XX(jl_get_binding_wr) \
XX(jl_get_cpu_name) \
XX(jl_get_cpu_features) \
XX(jl_cpu_has_fma) \
XX(jl_get_current_task) \
XX(jl_get_default_sysimg_path) \
XX(jl_get_excstack) \
Expand Down
4 changes: 2 additions & 2 deletions src/llvm-cpufeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,15 @@ static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTS
StringRef FS =
FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString();

SmallVector<StringRef, 6> Features;
SmallVector<StringRef, 128> Features;
FS.split(Features, ',');
for (StringRef Feature : Features)
if (TT.isARM()) {
if (Feature == "+vfp4")
return typ == "f32" || typ == "f64";
else if (Feature == "+vfp4sp")
return typ == "f32";
} else {
} else if (TT.isX86()) {
if (Feature == "+fma" || Feature == "+fma4")
return typ == "f32" || typ == "f64";
}
Expand Down
2 changes: 2 additions & 0 deletions src/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void);
// Return the features of the host CPU as a julia string.
JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void);
// Dump the name and feature set of the host CPU
JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits);
// Check if the CPU has native FMA instructions;
// For debugging only
JL_DLLEXPORT void jl_dump_host_cpu(void);
JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char* data);
Expand Down
16 changes: 16 additions & 0 deletions src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1808,6 +1808,22 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
}

JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
{
#ifdef _CPU_AARCH64_
return jl_true;
#else
TargetData<feature_sz> target = jit_targets.front();
FeatureList<feature_sz> features = target.en.features;
if (bits == 32 && test_nbit(features, Feature::vfp4sp))
return jl_true;
else if ((bits == 64 || bits == 32) && test_nbit(features, Feature::vfp4))
return jl_true;
else
return jl_false;
#endif
}

jl_image_t jl_init_processor_sysimg(void *hdl)
{
if (!jit_targets.empty())
Expand Down
5 changes: 5 additions & 0 deletions src/processor_fallback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
}

JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
{
return jl_false; // Match behaviour of have_fma in src/llvm-cpufeatures.cpp (assume false)
}

JL_DLLEXPORT void jl_dump_host_cpu(void)
{
jl_safe_printf("CPU: %s\n", host_cpu_name().c_str());
Expand Down
11 changes: 11 additions & 0 deletions src/processor_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

// CPUID

#include "julia.h"
extern "C" JL_DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType)
{
asm volatile (
Expand Down Expand Up @@ -1062,6 +1063,16 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
}

JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
{
TargetData<feature_sz> target = jit_targets.front();
FeatureList<feature_sz> features = target.en.features;
if ((bits == 32 || bits == 64) && (test_nbit(features, Feature::fma) || test_nbit(features, Feature::fma4)))
return jl_true;
else
return jl_false;
}

jl_image_t jl_init_processor_sysimg(void *hdl)
{
if (!jit_targets.empty())
Expand Down
11 changes: 8 additions & 3 deletions src/runtime_intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -1671,10 +1671,15 @@ un_fintrinsic(trunc_float,trunc_llvm)
un_fintrinsic(rint_float,rint_llvm)
un_fintrinsic(sqrt_float,sqrt_llvm)
un_fintrinsic(sqrt_float,sqrt_llvm_fast)
jl_value_t *jl_cpu_has_fma(int bits);

JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ)
{
JL_TYPECHK(have_fma, datatype, typ);
// TODO: run-time feature check?
return jl_false;
JL_TYPECHK(have_fma, datatype, typ); // TODO what about float16/bfloat16?
if (typ == (jl_value_t*)jl_float32_type)
return jl_cpu_has_fma(32);
else if (typ == (jl_value_t*)jl_float64_type)
return jl_cpu_has_fma(64);
else
return jl_false;
}
2 changes: 2 additions & 0 deletions test/llvmpasses/cpu-features.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s

; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-linux-gnu"

declare i1 @julia.cpu.have_fma.f64()
declare double @with_fma(double %0, double %1, double %2)
Expand Down
2 changes: 2 additions & 0 deletions test/sysinfo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ Base.Sys.loadavg()

@test length(ccall(:jl_get_cpu_name, String, ())) != 0
@test length(ccall(:jl_get_cpu_features, String, ())) >= 0
foo_fma() = Core.Intrinsics.have_fma(Int64)
@test ccall(:jl_cpu_has_fma, Bool, (Cint,), 64) == foo_fma()

if Sys.isunix()
mktempdir() do tempdir
Expand Down