Skip to content

Commit

Permalink
SPU LLVM: rewrite comparison on non-xfloat path of CFLTU, CFLTS
Browse files Browse the repository at this point in the history
CFLTU on non-xfloat path is accurate as xfloat path now.
* Also optimize FCTIW like FCTIWZ (PPU)
  • Loading branch information
elad335 committed Dec 26, 2019
1 parent e54438d commit 945c435
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 8 deletions.
6 changes: 3 additions & 3 deletions rpcs3/Emu/Cell/PPUInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4832,9 +4832,9 @@ bool ppu_interpreter::FCMPU(ppu_thread& ppu, ppu_opcode_t op)

bool ppu_interpreter::FCTIW(ppu_thread& ppu, ppu_opcode_t op)
{
const f64 b = ppu.fpr[op.frb];
const s32 res = b >= f64(INT32_MAX) ? INT32_MAX : _mm_cvtsd_si32(_mm_load_sd(&b));
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(res);
const auto b = _mm_load_sd(&ppu.fpr[op.frb]);
const auto res = _mm_xor_si128(_mm_cvtpd_epi32(b), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(0x80000000))));
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(_mm_cvtsi128_si32(res));
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
return true;
}
Expand Down
6 changes: 4 additions & 2 deletions rpcs3/Emu/Cell/PPUTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4004,8 +4004,10 @@ void PPUTranslator::FRSP(ppu_opcode_t op)
void PPUTranslator::FCTIW(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
SetFpr(op.frd, m_ir->CreateSelect(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), f64(INT32_MAX))), m_ir->getInt32(INT32_MAX),
Call(GetType<s32>(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), std::exp2l(31.))), GetType<s32>());

// fix result saturation (0x80000000 -> 0x7fffffff)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s32>(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));

//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fctiw_get_fr", b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fctiw_get_fi", b));
Expand Down
6 changes: 3 additions & 3 deletions rpcs3/Emu/Cell/SPURecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7509,7 +7509,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

value_t<s32[4]> r;
r.value = m_ir->CreateFPToSI(a.value, get_type<s32[4]>());
set_vr(op.rt, r ^ sext<s32[4]>(fcmp_ord(a >= fsplat<f32[4]>(std::exp2(31.f)))));
set_vr(op.rt, r ^ sext<s32[4]>(bitcast<s32[4]>(a) > splat<s32[4]>(((31 + 127) << 23) - 1)));
}
}

Expand Down Expand Up @@ -7568,7 +7568,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
}

r.value = m_ir->CreateFPToUI(a.value, get_type<s32[4]>());
set_vr(op.rt, select(fcmp_uno(a >= fsplat<f64[4]>(std::exp2(32.f))), splat<s32[4]>(-1), r & sext<s32[4]>(fcmp_ord(a >= fsplat<f64[4]>(0.)))));
set_vr(op.rt, select(fcmp_ord(a >= fsplat<f64[4]>(std::exp2(32.f))), splat<s32[4]>(-1), r & sext<s32[4]>(fcmp_ord(a >= fsplat<f64[4]>(0.)))));
}
else
{
Expand All @@ -7583,7 +7583,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

value_t<s32[4]> r;
r.value = m_ir->CreateFPToUI(a.value, get_type<s32[4]>());
set_vr(op.rt, select(fcmp_uno(a >= fsplat<f32[4]>(std::exp2(32.f))), splat<s32[4]>(-1), r & ~(bitcast<s32[4]>(a) >> 31)));
set_vr(op.rt, select(bitcast<s32[4]>(a) > splat<s32[4]>(((32 + 127) << 23) - 1), splat<s32[4]>(-1), r & ~(bitcast<s32[4]>(a) >> 31)));
}
}

Expand Down

0 comments on commit 945c435

Please sign in to comment.