Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SPU LLVM: Try to reduce float clamping by using round-to-zero #12559

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 35 additions & 6 deletions rpcs3/Emu/Cell/SPURecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8425,6 +8425,24 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
{
return true;
}
}

bool is_input_float_result(value_t<f32[4]> v)
{
static const auto MT = match<f32[4]>();

if (std::get<0>(match_expr(v, fm(MT, MT))) ||
std::get<0>(match_expr(v, fma(MT, MT, MT))) ||
std::get<0>(match_expr(v, fms(MT, MT, MT))) ||
std::get<0>(match_expr(v, fnms(MT, MT, MT)))
//std::get<0>(match_expr(v, fa(MT, MT))) ||
//std::get<0>(match_expr(v, fs(MT, MT))) ||
//std::get<0>(match_expr(v, spu_re(MT))) ||
//std::get<0>(match_expr(v, spu_rsqrte(MT)))
)
{
return true;
}

return false;
}
Expand All @@ -8447,6 +8465,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

value_t<f32[4]> clamp_smax(value_t<f32[4]> v)
{
if (g_cfg.core.spu_approx_xfloat)
{
if (is_input_float_result(v))
{
return v;
}
}

if (m_use_avx512)
{
if (is_input_positive(v))
Expand Down Expand Up @@ -8775,7 +8801,10 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

const auto ma = sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.)));
const auto mb = sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.)));
return eval(bitcast<f32[4]>(bitcast<s32[4]>(a * b) & ma & mb));
const auto mul = eval(bitcast<s32[4]>(a * b));
const auto after_a = is_input_float_result(a) ? mul : eval(ma & mul);
const auto after_b = is_input_float_result(b) ? after_a : eval(mb & after_a);
return eval(bitcast<f32[4]>(after_b));
}
else
{
Expand Down Expand Up @@ -9093,7 +9122,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
{
return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), c);
return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), clamp_smax(c));
}
else
{
Expand Down Expand Up @@ -9130,9 +9159,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
{
const auto ma = sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.)));
const auto mb = sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.)));
const auto ca = bitcast<f32[4]>(bitcast<s32[4]>(a) & mb);
const auto cb = bitcast<f32[4]>(bitcast<s32[4]>(b) & ma);
return fma32x4(eval(ca), eval(cb), c);
const auto ca = is_input_float_result(b) ? a : eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
const auto cb = is_input_float_result(a) ? b : eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
return fma32x4(eval(ca), eval(cb), clamp_smax(c));
}
else
{
Expand Down Expand Up @@ -9202,7 +9231,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

if (g_cfg.core.spu_approx_xfloat)
{
return fma32x4(clamp_smax(a), clamp_smax(b), eval(-c));
return fma32x4(clamp_smax(a), clamp_smax(b), eval(-clamp_smax(c)));
}
else
{
Expand Down