Skip to content

Commit

Permalink
Fix SPU Interpreter regression after RPCS3#6147
Browse files Browse the repository at this point in the history
  • Loading branch information
Nekotekina committed Jul 15, 2019
1 parent 1e2f81d commit cb5c26f
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 37 deletions.
53 changes: 49 additions & 4 deletions rpcs3/Emu/Cell/SPURecompiler.cpp
Expand Up @@ -24,6 +24,9 @@ const spu_decoder<spu_itype> s_spu_itype;
const spu_decoder<spu_iname> s_spu_iname;
const spu_decoder<spu_iflag> s_spu_iflag;

extern const spu_decoder<spu_interpreter_precise> g_spu_interpreter_precise;
extern const spu_decoder<spu_interpreter_fast> g_spu_interpreter_fast;

extern u64 get_timebased_time();

// Move 4 args for calling native function from a GHC calling convention function
Expand Down Expand Up @@ -74,6 +77,18 @@ DECLARE(spu_runtime::tr_branch) = []
return reinterpret_cast<spu_function_t>(trptr);
}();

DECLARE(spu_runtime::tr_interpreter) = []
{
u8* const trptr = jit_runtime::alloc(32, 16);
u8* raw = move_args_ghc_to_native(trptr);
*raw++ = 0xff; // jmp [rip]
*raw++ = 0x25;
std::memset(raw, 0, 4);
const u64 target = reinterpret_cast<u64>(&spu_recompiler_base::old_interpreter);
std::memcpy(raw + 4, &target, 8);
return reinterpret_cast<spu_function_t>(trptr);
}();

DECLARE(spu_runtime::g_dispatcher) = []
{
const auto ptr = reinterpret_cast<decltype(spu_runtime::g_dispatcher)>(jit_runtime::alloc(sizeof(spu_function_t), 8, false));
Expand Down Expand Up @@ -281,7 +296,8 @@ void spu_cache::add(const std::vector<u32>& func)

void spu_cache::initialize()
{
spu_runtime::g_interpreter = nullptr;
spu_runtime::g_interpreter = spu_runtime::g_gateway;
*spu_runtime::g_dispatcher = spu_runtime::tr_interpreter;

const std::string ppu_cache = Emu.PPUCache();

Expand Down Expand Up @@ -1145,6 +1161,37 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip)
atomic_storage<u64>::release(*reinterpret_cast<u64*>(rip), result);
}

void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* rip) try
{
// Select opcode table
const auto& table = *(
g_cfg.core.spu_decoder == spu_decoder_type::precise ? &g_spu_interpreter_precise.get_table() :
g_cfg.core.spu_decoder == spu_decoder_type::fast ? &g_spu_interpreter_fast.get_table() :
(fmt::throw_exception<std::logic_error>("Invalid SPU decoder"), nullptr));

// LS pointer
const auto base = static_cast<const u8*>(ls);

while (true)
{
if (UNLIKELY(spu.state))
{
if (spu.check_state())
break;
}

const u32 op = *reinterpret_cast<const be_t<u32>*>(base + spu.pc);
if (table[spu_decode(op)](spu, {op}))
spu.pc += 4;
}
}
catch (const std::exception& e)
{
Emu.Pause();
LOG_FATAL(GENERAL, "%s thrown: %s", typeid(e).name(), e.what());
LOG_NOTICE(GENERAL, "\n%s", spu.dump());
}

const std::vector<u32>& spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)
{
// Result: addr + raw instruction data
Expand Down Expand Up @@ -4713,8 +4760,6 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

static void interp_check(spu_thread* _spu, bool after)
{
static const spu_decoder<spu_interpreter_fast> s_dec;

static thread_local std::array<v128, 128> s_gpr;

if (!after)
Expand All @@ -4724,7 +4769,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

// Execute interpreter instruction
const u32 op = *reinterpret_cast<const be_t<u32>*>(_spu->_ptr<u8>(0) + _spu->pc);
if (!s_dec.decode(op)(*_spu, {op}))
if (!g_spu_interpreter_fast.decode(op)(*_spu, {op}))
LOG_FATAL(SPU, "Bad instruction" HERE);

// Swap state
Expand Down
6 changes: 6 additions & 0 deletions rpcs3/Emu/Cell/SPURecompiler.h
Expand Up @@ -70,6 +70,9 @@ class spu_runtime
// Trampoline to spu_recompiler_base::branch
static const spu_function_t tr_branch;

// Trampoline to legacy interpreter
static const spu_function_t tr_interpreter;

public:
spu_runtime();

Expand Down Expand Up @@ -356,6 +359,9 @@ class spu_recompiler_base
// Target for the unresolved patch point (second arg is unused)
static void branch(spu_thread&, void*, u8* rip);

// Legacy interpreter loop
static void old_interpreter(spu_thread&, void* ls, u8*);

// Get the function data at specified address
const std::vector<u32>& analyse(const be_t<u32>* ls, u32 lsa);

Expand Down
36 changes: 3 additions & 33 deletions rpcs3/Emu/Cell/SPUThread.cpp
Expand Up @@ -71,10 +71,6 @@ static FORCE_INLINE void mov_rdata(decltype(spu_thread::rdata)& dst, const declt
extern u64 get_timebased_time();
extern u64 get_system_time();

extern const spu_decoder<spu_interpreter_precise> g_spu_interpreter_precise;

extern const spu_decoder<spu_interpreter_fast> g_spu_interpreter_fast;

extern thread_local u64 g_tls_fault_spu;

template <>
Expand Down Expand Up @@ -1156,12 +1152,11 @@ void spu_thread::cpu_task()

// Print some stats
LOG_NOTICE(SPU, "Stats: Block Weight: %u (Retreats: %u);", block_counter, block_failure);
cpu_stop();
return;
}

if (spu_runtime::g_interpreter)
else
{
ASSERT(spu_runtime::g_interpreter);

while (true)
{
if (UNLIKELY(state))
Expand All @@ -1172,31 +1167,6 @@ void spu_thread::cpu_task()

spu_runtime::g_interpreter(*this, vm::_ptr<u8>(offset), nullptr);
}

cpu_stop();
return;
}

// Select opcode table
const auto& table = *(
g_cfg.core.spu_decoder == spu_decoder_type::precise ? &g_spu_interpreter_precise.get_table() :
g_cfg.core.spu_decoder == spu_decoder_type::fast ? &g_spu_interpreter_fast.get_table() :
(fmt::throw_exception<std::logic_error>("Invalid SPU decoder"), nullptr));

// LS pointer
const auto base = vm::_ptr<const u8>(offset);

while (true)
{
if (UNLIKELY(state))
{
if (check_state())
break;
}

const u32 op = *reinterpret_cast<const be_t<u32>*>(base + pc);
if (table[spu_decode(op)](*this, {op}))
pc += 4;
}

cpu_stop();
Expand Down

0 comments on commit cb5c26f

Please sign in to comment.