Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SPU LLVM: rewrite comparison on non-xfloat path of CFLTU, CFLTS #6872

Merged
merged 1 commit into from
Dec 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions rpcs3/Emu/Cell/PPUInterpreter.cpp
Expand Up @@ -4832,9 +4832,9 @@ bool ppu_interpreter::FCMPU(ppu_thread& ppu, ppu_opcode_t op)

bool ppu_interpreter::FCTIW(ppu_thread& ppu, ppu_opcode_t op)
{
const f64 b = ppu.fpr[op.frb];
const s32 res = b >= f64(INT32_MAX) ? INT32_MAX : _mm_cvtsd_si32(_mm_load_sd(&b));
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(res);
const auto b = _mm_load_sd(&ppu.fpr[op.frb]);
const auto res = _mm_xor_si128(_mm_cvtpd_epi32(b), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(0x80000000))));
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(_mm_cvtsi128_si32(res));
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
return true;
}
Expand Down
6 changes: 4 additions & 2 deletions rpcs3/Emu/Cell/PPUTranslator.cpp
Expand Up @@ -4004,8 +4004,10 @@ void PPUTranslator::FRSP(ppu_opcode_t op)
void PPUTranslator::FCTIW(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
SetFpr(op.frd, m_ir->CreateSelect(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), f64(INT32_MAX))), m_ir->getInt32(INT32_MAX),
Call(GetType<s32>(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), std::exp2l(31.))), GetType<s32>());

// fix result saturation (0x80000000 -> 0x7fffffff)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s32>(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));

//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fctiw_get_fr", b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fctiw_get_fi", b));
Expand Down
6 changes: 3 additions & 3 deletions rpcs3/Emu/Cell/SPURecompiler.cpp
Expand Up @@ -7509,7 +7509,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

value_t<s32[4]> r;
r.value = m_ir->CreateFPToSI(a.value, get_type<s32[4]>());
set_vr(op.rt, r ^ sext<s32[4]>(fcmp_ord(a >= fsplat<f32[4]>(std::exp2(31.f)))));
set_vr(op.rt, r ^ sext<s32[4]>(bitcast<s32[4]>(a) > splat<s32[4]>(((31 + 127) << 23) - 1)));
}
}

Expand Down Expand Up @@ -7568,7 +7568,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
}

r.value = m_ir->CreateFPToUI(a.value, get_type<s32[4]>());
set_vr(op.rt, select(fcmp_uno(a >= fsplat<f64[4]>(std::exp2(32.f))), splat<s32[4]>(-1), r & sext<s32[4]>(fcmp_ord(a >= fsplat<f64[4]>(0.)))));
set_vr(op.rt, select(fcmp_ord(a >= fsplat<f64[4]>(std::exp2(32.f))), splat<s32[4]>(-1), r & sext<s32[4]>(fcmp_ord(a >= fsplat<f64[4]>(0.)))));
}
else
{
Expand All @@ -7583,7 +7583,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

value_t<s32[4]> r;
r.value = m_ir->CreateFPToUI(a.value, get_type<s32[4]>());
set_vr(op.rt, select(fcmp_uno(a >= fsplat<f32[4]>(std::exp2(32.f))), splat<s32[4]>(-1), r & ~(bitcast<s32[4]>(a) >> 31)));
set_vr(op.rt, select(bitcast<s32[4]>(a) > splat<s32[4]>(((32 + 127) << 23) - 1), splat<s32[4]>(-1), r & ~(bitcast<s32[4]>(a) >> 31)));
}
}

Expand Down