Skip to content

Commit

Permalink
SPU LLVM: use FMA with approx xfloat when available
Browse files Browse the repository at this point in the history
Emulate FMA with double precision if unsupported natively.
  • Loading branch information
Nekotekina committed Dec 21, 2019
1 parent 068450d commit 3b46c9c
Showing 1 changed file with 23 additions and 6 deletions.
29 changes: 23 additions & 6 deletions rpcs3/Emu/Cell/SPURecompiler.cpp
Expand Up @@ -7362,13 +7362,30 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
set_vr(op.rt, sext<s32[4]>(fcmp_ord(fabs(get_vr<f32[4]>(op.ra)) == fabs(get_vr<f32[4]>(op.rb)))));
}

// Multiply and return zero if any of the arguments is in the xfloat range.
value_t<f32[4]> mzero_if_xtended(value_t<f32[4]> a, value_t<f32[4]> b)
value_t<f32[4]> fma32x4(value_t<f32[4]> a, value_t<f32[4]> b, value_t<f32[4]> c)
{
// Compare absolute values with max positive float in normal range.
const auto aa = bitcast<s32[4]>(fabs(a));
const auto ab = bitcast<s32[4]>(fabs(b));
return eval(select(max(aa, ab) > 0x7f7fffff, fsplat<f32[4]>(0.), a * b));
const auto sc = eval(max(aa, ab) > 0x7f7fffff);

if (m_use_fma)
{
value_t<f32[4]> r;
r.value = m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), {a.value, b.value, c.value});
r.value = m_ir->CreateSelect(sc.value, c.value, r.value);
return r;
}

// Convert to doubles
const auto xa = m_ir->CreateFPExt(a.value, get_type<f64[4]>());
const auto xb = m_ir->CreateFPExt(b.value, get_type<f64[4]>());
const auto xc = m_ir->CreateFPExt(c.value, get_type<f64[4]>());
const auto xr = m_ir->CreateCall(get_intrinsic<f64[4]>(llvm::Intrinsic::fmuladd), {xa, xb, xc});
value_t<f32[4]> r;
r.value = m_ir->CreateFPTrunc(xr, get_type<f32[4]>());
r.value = m_ir->CreateSelect(sc.value, c.value, r.value);
return r;
}

void FNMS(spu_opcode_t op)
Expand All @@ -7377,7 +7394,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
if (g_cfg.core.spu_accurate_xfloat)
set_vr(op.rt4, -fmuladd(get_vr<f64[4]>(op.ra), get_vr<f64[4]>(op.rb), eval(-get_vr<f64[4]>(op.rc))));
else if (g_cfg.core.spu_approx_xfloat)
set_vr(op.rt4, get_vr<f32[4]>(op.rc) - mzero_if_xtended(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb)));
set_vr(op.rt4, -fma32x4(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb), eval(-get_vr<f32[4]>(op.rc))));
else
set_vr(op.rt4, get_vr<f32[4]>(op.rc) - get_vr<f32[4]>(op.ra) * get_vr<f32[4]>(op.rb));
}
Expand All @@ -7388,7 +7405,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
if (g_cfg.core.spu_accurate_xfloat)
set_vr(op.rt4, fmuladd(get_vr<f64[4]>(op.ra), get_vr<f64[4]>(op.rb), get_vr<f64[4]>(op.rc)));
else if (g_cfg.core.spu_approx_xfloat)
set_vr(op.rt4, mzero_if_xtended(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb)) + get_vr<f32[4]>(op.rc));
set_vr(op.rt4, fma32x4(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb), get_vr<f32[4]>(op.rc)));
else
set_vr(op.rt4, get_vr<f32[4]>(op.ra) * get_vr<f32[4]>(op.rb) + get_vr<f32[4]>(op.rc));
}
Expand All @@ -7399,7 +7416,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
if (g_cfg.core.spu_accurate_xfloat)
set_vr(op.rt4, fmuladd(get_vr<f64[4]>(op.ra), get_vr<f64[4]>(op.rb), eval(-get_vr<f64[4]>(op.rc))));
else if (g_cfg.core.spu_approx_xfloat)
set_vr(op.rt4, mzero_if_xtended(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb)) - get_vr<f32[4]>(op.rc));
set_vr(op.rt4, fma32x4(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb), eval(-get_vr<f32[4]>(op.rc))));
else
set_vr(op.rt4, get_vr<f32[4]>(op.ra) * get_vr<f32[4]>(op.rb) - get_vr<f32[4]>(op.rc));
}
Expand Down

0 comments on commit 3b46c9c

Please sign in to comment.