From 09eb633f69b65866587f493263d2f696055aa3b5 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 15 May 2019 02:16:08 +0300 Subject: [PATCH 01/11] SPU ASMJIT: increase stack frame size It seems Windows has minimal stack frame size 0x28. --- rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 7bf3bfa9f37..e52aa2b45c2 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -157,7 +157,7 @@ spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vectorpush(x86::rax); + c->sub(x86::rsp, 0x28); c->mov(pc0->r32(), SPU_OFF_32(pc)); c->cmp(SPU_OFF_32(state), 0); c->jnz(label_stop); @@ -793,7 +793,7 @@ spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vectoralign(kAlignCode, 16); c->bind(label_stop); - c->pop(x86::rax); + c->add(x86::rsp, 0x28); c->ret(); if (g_cfg.core.spu_verification) @@ -802,7 +802,7 @@ spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vectoralign(kAlignCode, 16); c->bind(label_diff); c->inc(SPU_OFF_64(block_failure)); - c->pop(x86::rax); + c->add(x86::rsp, 0x28); c->jmp(imm_ptr(spu_runtime::tr_dispatch)); } @@ -981,7 +981,7 @@ void spu_recompiler::branch_fixed(u32 target) if (ppptr) { - c->pop(x86::rax); + c->add(x86::rsp, 0x28); c->jmp(imm_ptr(ppptr)); } else @@ -1025,7 +1025,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret) c->bind(fail); c->mov(SPU_OFF_32(pc), *addr); c->mov(*arg0, *cpu); - c->pop(x86::rax); + c->add(x86::rsp, 0x28); c->jmp(imm_ptr(_throw)); // Save addr in srr0 and disable interrupts @@ -1092,7 +1092,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret) if (ppptr) { - c->pop(x86::rax); + c->add(x86::rsp, 0x28); c->jmp(imm_ptr(ppptr)); } else @@ -1282,7 +1282,7 @@ void spu_recompiler::get_events() c->bind(fail); c->mov(*arg0, *cpu); - c->pop(x86::rax); + c->add(x86::rsp, 0x28); c->jmp(imm_ptr(_throw)); }); @@ -1308,7 +1308,7 @@ void spu_recompiler::UNK(spu_opcode_t op) c->mov(SPU_OFF_32(pc), *addr); c->mov(arg1->r32(), op.opcode); c->mov(*arg0, *cpu); - c->pop(asmjit::x86::rax); + c->add(asmjit::x86::rsp, 0x28); c->jmp(asmjit::imm_ptr(gate)); m_pos = -1; } From b2d0ca83fb363496554688b6a5d3ddd892440ce9 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 15 May 2019 15:17:36 +0300 Subject: [PATCH 02/11] LLVM DSL: simplify value_t template for array --- rpcs3/Emu/CPU/CPUTranslator.h | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 493048893a2..38b5aad48a8 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -348,32 +348,13 @@ struct llvm_value_t : llvm_value_t } }; -template -struct llvm_value_t : llvm_value_t -{ - using type = T[0][N]; - using base = llvm_value_t; - using base::base; - - static constexpr bool is_int = false; - static constexpr bool is_sint = false; - static constexpr bool is_uint = false; - static constexpr bool is_float = false; - static constexpr uint is_array = N; - static constexpr uint is_vector = false; - static constexpr uint is_pointer = false; - - static llvm::Type* get_type(llvm::LLVMContext& context) - { - return llvm::ArrayType::get(llvm_value_t::get_type(context), N); - } -}; - +// u32[4][123] : array of 123 u32[4] vectors +// u32[0][123] : array of 123 u32 scalars template -struct llvm_value_t : llvm_value_t +struct llvm_value_t : llvm_value_t> { using type = T[V][N]; - using base = llvm_value_t; + using base = llvm_value_t>; using base::base; static constexpr bool is_int = false; @@ -386,7 +367,7 @@ struct llvm_value_t : llvm_value_t static llvm::Type* get_type(llvm::LLVMContext& context) { - return llvm::ArrayType::get(llvm_value_t::get_type(context), N); + return llvm::ArrayType::get(base::get_type(context), N); } }; From 16401722f12f906431dc78ee7f9d0b0b7274251f Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 15 May 2019 15:42:03 +0300 Subject: [PATCH 03/11] SPU LLVM: fix $SP passing in functions, write PC on halt Allows to skip updating $SP in optimizable functions. --- rpcs3/Emu/Cell/SPURecompiler.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 7f0e29cedae..a41e99eb5ba 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -3288,7 +3288,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator { // Real function type (not equal to chunk type) // 4. $SP (only 32 bit value) - const auto func_type = get_ftype(); + const auto func_type = get_ftype(); const std::string fname = fmt::format("spu-function-0x%05x", addr); llvm::Function* fn = llvm::cast(m_module->getOrInsertFunction(fname, func_type).getCallee()); @@ -3365,7 +3365,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (!m_finfo->fn && !m_block) { lr = m_ir->CreateLoad(spu_ptr(&spu_thread::gpr, +s_reg_lr, &v128::_u32, 3)); - sp = m_ir->CreateLoad(spu_ptr(&spu_thread::gpr, +s_reg_sp, &v128::_u32, 3)); + sp = m_ir->CreateLoad(spu_ptr(&spu_thread::gpr, +s_reg_sp)); for (u32 i = 3; i < 3 + std::size(args); i++) { @@ -3375,7 +3375,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator else { lr = m_ir->CreateExtractElement(get_reg_fixed(s_reg_lr).value, 3); - sp = m_ir->CreateExtractElement(get_reg_fixed(s_reg_sp).value, 3); + sp = get_reg_fixed(s_reg_sp).value; for (u32 i = 3; i < 3 + std::size(args); i++) { @@ -3482,7 +3482,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } // Load $SP - //m_finfo->load[s_reg_sp] = m_ir->CreateVectorSplat(4, &*(fn->arg_begin() + 3)); + m_finfo->load[s_reg_sp] = &*(fn->arg_begin() + 3); // Load first args for (u32 i = 3; i < 5; i++) @@ -3920,6 +3920,11 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Don't save some registers in true functions return; } + + if (index == s_reg_sp) + { + return; + } } // Write register to the context @@ -7498,7 +7503,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator { if (m_finfo && m_finfo->fn) { - if (op.rt == s_reg_lr || (op.rt >= s_reg_80 && op.rt <= s_reg_127)) + if (op.rt <= s_reg_sp || (op.rt >= s_reg_80 && op.rt <= s_reg_127)) { if (m_block->bb->reg_save_dom[op.rt] && get_reg_raw(op.rt) == m_finfo->load[op.rt]) { @@ -7525,6 +7530,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator m_ir->SetInsertPoint(halt); if (m_interp_magn) m_ir->CreateStore(&*(m_function->arg_begin() + 2), spu_ptr(&spu_thread::pc))->setVolatile(true); + else + update_pc(); const auto pstatus = spu_ptr(&spu_thread::status); const auto chalt = m_ir->getInt32(SPU_STATUS_STOPPED_BY_HALT); m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::Or, pstatus, chalt, llvm::AtomicOrdering::Release)->setVolatile(true); From adc7d966835315dc24e056a40f1bd81ed8536cd1 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 15 May 2019 16:18:13 +0300 Subject: [PATCH 04/11] SPU LLVM: simplify function prototype Pass only $3 --- rpcs3/Emu/Cell/SPURecompiler.cpp | 45 +++++++++----------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index a41e99eb5ba..6d4a5b089f2 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -3287,8 +3287,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (ffound != m_funcs.end() && ffound->second.good) { // Real function type (not equal to chunk type) - // 4. $SP (only 32 bit value) - const auto func_type = get_ftype(); + // 4. $SP + // 5. $3 + const auto func_type = get_ftype(); const std::string fname = fmt::format("spu-function-0x%05x", addr); llvm::Function* fn = llvm::cast(m_module->getOrInsertFunction(fname, func_type).getCallee()); @@ -3360,30 +3361,22 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator { llvm::Value* lr{}; llvm::Value* sp{}; - llvm::Value* args[2]{}; + llvm::Value* r3{}; if (!m_finfo->fn && !m_block) { lr = m_ir->CreateLoad(spu_ptr(&spu_thread::gpr, +s_reg_lr, &v128::_u32, 3)); sp = m_ir->CreateLoad(spu_ptr(&spu_thread::gpr, +s_reg_sp)); - - for (u32 i = 3; i < 3 + std::size(args); i++) - { - args[i - 3] = m_ir->CreateLoad(spu_ptr(&spu_thread::gpr, +i)); - } + r3 = m_ir->CreateLoad(spu_ptr(&spu_thread::gpr, 3)); } else { lr = m_ir->CreateExtractElement(get_reg_fixed(s_reg_lr).value, 3); sp = get_reg_fixed(s_reg_sp).value; - - for (u32 i = 3; i < 3 + std::size(args); i++) - { - args[i - 3] = get_reg_fixed(i).value; - } + r3 = get_reg_fixed(3).value; } - const auto _call = m_ir->CreateCall(verify(HERE, fn), {m_thread, m_lsptr, m_base_pc, sp, args[0], args[1]}); + const auto _call = m_ir->CreateCall(verify(HERE, fn), {m_thread, m_lsptr, m_base_pc, sp, r3}); _call->setCallingConv(fn->getCallingConv()); @@ -3392,7 +3385,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator { lr = m_ir->CreateAnd(lr, 0x3fffc); m_ir->CreateStore(lr, spu_ptr(&spu_thread::pc)); - m_ir->CreateStore(_call, spu_ptr(&spu_thread::gpr, 3)); + m_ir->CreateStore(_call, spu_ptr(&spu_thread::gpr, 3)); m_ir->CreateBr(add_block_indirect({}, value(lr))); } else if (tail) @@ -3411,24 +3404,15 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } } - for (u32 i = 3; i < 3 + std::size(args); i++) - { - m_block->reg[i] = m_ir->CreateExtractValue(_call, {i - 3}); - } + // Set result + m_block->reg[3] = _call; } } // Emit return from the real function void ret_function() { - llvm::Value* r = llvm::ConstantAggregateZero::get(get_type()); - - for (u32 i = 3; i < 5; i++) - { - r = m_ir->CreateInsertValue(r, get_reg_fixed(i).value, {i - 3}); - } - - m_ir->CreateRet(r); + m_ir->CreateRet(get_reg_fixed(3).value); } void set_function(llvm::Function* func) @@ -3485,10 +3469,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator m_finfo->load[s_reg_sp] = &*(fn->arg_begin() + 3); // Load first args - for (u32 i = 3; i < 5; i++) - { - m_finfo->load[i] = &*(fn->arg_begin() + i + 1); - } + m_finfo->load[3] = &*(fn->arg_begin() + 4); } } else if (m_block_info[target / 4] && m_entry_info[target / 4] && !(pred_found && m_entry == target) && (!m_finfo->fn || !m_ret_info[target / 4])) @@ -3915,7 +3896,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (m_finfo && m_finfo->fn) { - if (index == s_reg_lr || (index >= 3 && index <= 4) || (index >= s_reg_80 && index <= s_reg_127)) + if (index == s_reg_lr || index == 3 || (index >= s_reg_80 && index <= s_reg_127)) { // Don't save some registers in true functions return; From 4e75d2c2f74cd61f716722c7723659d2d7f81b72 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 15 May 2019 16:41:57 +0300 Subject: [PATCH 05/11] SPU LLVM: don't save $2 in optimized functions --- rpcs3/Emu/Cell/SPURecompiler.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 6d4a5b089f2..80d81f7dac7 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -3896,16 +3896,11 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (m_finfo && m_finfo->fn) { - if (index == s_reg_lr || index == 3 || (index >= s_reg_80 && index <= s_reg_127)) + if (index <= 3 || (index >= s_reg_80 && index <= s_reg_127)) { // Don't save some registers in true functions return; } - - if (index == s_reg_sp) - { - return; - } } // Write register to the context From 007108100edc80ba5d8d6cdd23159468327774c0 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 15 May 2019 18:44:53 +0300 Subject: [PATCH 06/11] SPU: implement spu_runtime::g_tail_escape May help to avoid gateway costs in some cases. --- rpcs3/Emu/Cell/SPURecompiler.cpp | 21 +++++++++++++++++++-- rpcs3/Emu/Cell/SPURecompiler.h | 3 +++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 80d81f7dac7..657d90ef408 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -180,8 +180,25 @@ DECLARE(spu_runtime::g_escape) = build_function_asm([](asm // Restore native stack pointer (longjmp emulation) c.mov(x86::rsp, x86::qword_ptr(args[0], ::offset32(&spu_thread::saved_native_sp))); - c.sub(x86::rsp, 8); - c.ret(); + + // Return to the return location + c.jmp(x86::qword_ptr(x86::rsp, -8)); +}); + +DECLARE(spu_runtime::g_tail_escape) = build_function_asm([](asmjit::X86Assembler& c, auto& args) +{ + using namespace asmjit; + + // Restore native stack pointer (longjmp emulation) + c.mov(x86::rsp, x86::qword_ptr(args[0], ::offset32(&spu_thread::saved_native_sp))); + + // Tail call, GHC CC (second arg) + c.mov(x86::r13, args[0]); + c.mov(x86::ebp, x86::dword_ptr(args[0], ::offset32(&spu_thread::offset))); + c.add(x86::rbp, x86::qword_ptr(args[0], ::offset32(&spu_thread::memory_base_addr))); + c.mov(x86::r12, args[2]); + c.xor_(x86::ebx, x86::ebx); + c.jmp(args[1]); }); DECLARE(spu_runtime::g_interpreter) = nullptr; diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 28f33aff007..cd9648ef5ee 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -111,6 +111,9 @@ class spu_runtime // Longjmp to the end of the gateway function (native CC) static void(*const g_escape)(spu_thread*); + // Similar to g_escape, but doing tail call to the new function. + static void(*const g_tail_escape)(spu_thread*, spu_function_t, u8*); + // Interpreter entry point static spu_function_t g_interpreter; From 43ae4b3f3315238348737c5c797d58e90d909f51 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 16 May 2019 00:51:47 +0300 Subject: [PATCH 07/11] SPU LLVM/ASMJIT: add missing PC clamping Minor fix, since it's mostly impossible to overflow. --- rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 27 ++++++++++++++++++++++++++ rpcs3/Emu/Cell/SPURecompiler.cpp | 4 ++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index e52aa2b45c2..85fe1a9e43a 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -964,6 +964,7 @@ void spu_recompiler::branch_fixed(u32 target) c->cmp(SPU_OFF_32(state), 0); c->jz(local->second); c->lea(addr->r64(), get_pc(target)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->mov(*arg0, *cpu); c->call(imm_ptr(&check_state)); @@ -974,6 +975,7 @@ void spu_recompiler::branch_fixed(u32 target) const auto ppptr = !g_cfg.core.spu_verification ? nullptr : m_spurt->make_branch_patchpoint(); c->lea(addr->r64(), get_pc(target)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->xor_(rip->r32(), rip->r32()); c->cmp(SPU_OFF_32(state), 0); @@ -1121,6 +1123,7 @@ void spu_recompiler::branch_set_link(u32 target) c->lea(x86::r10, x86::qword_ptr(ret)); c->mov(x86::qword_ptr(*qw1, 0), x86::r10); c->lea(x86::r10, get_pc(target)); + c->and_(x86::r10d, 0x3fffc); c->mov(x86::dword_ptr(*qw1, 8), x86::r10d); c->mov(x86::dword_ptr(*qw1, 12), pc0->r32()); @@ -1152,6 +1155,7 @@ void spu_recompiler::fall(spu_opcode_t op) }; c->lea(addr->r64(), get_pc(m_pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->mov(arg1->r32(), op.opcode); c->mov(*qw0, asmjit::imm_ptr(asmjit::Internal::ptr_cast(g_spu_interpreter_fast.decode(op.opcode)))); @@ -1305,6 +1309,7 @@ void spu_recompiler::UNK(spu_opcode_t op) }; c->lea(addr->r64(), get_pc(m_pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->mov(arg1->r32(), op.opcode); c->mov(*arg0, *cpu); @@ -1327,6 +1332,7 @@ void spu_recompiler::STOP(spu_opcode_t op) Label ret = c->newLabel(); c->lea(addr->r64(), get_pc(m_pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->mov(arg1->r32(), op.opcode & 0x3fff); c->mov(*arg0, *cpu); @@ -1364,6 +1370,7 @@ void spu_recompiler::SYNC(spu_opcode_t op) if (g_cfg.core.spu_block_size == spu_block_size_type::safe) { c->lea(addr->r64(), get_pc(m_pos + 4)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->jmp(label_stop); m_pos = -1; @@ -1416,6 +1423,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) { c->bind(wait); c->lea(addr->r64(), get_pc(pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->mov(arg1->r32(), op.ra); c->mov(*arg0, *cpu); @@ -1512,6 +1520,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) using ftype = void (*)(spu_thread*, v128*); c->lea(addr->r64(), get_pc(m_pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->lea(*arg1, SPU_OFF_128(gpr, op.rt)); c->mov(*arg0, *cpu); @@ -1538,6 +1547,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) { c->bind(wait); c->lea(addr->r64(), get_pc(pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->mov(arg1->r32(), op.ra); c->mov(*arg0, *cpu); @@ -1563,6 +1573,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) } c->lea(addr->r64(), get_pc(m_pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->mov(arg1->r32(), op.ra); c->mov(*arg0, *cpu); @@ -1648,6 +1659,7 @@ void spu_recompiler::RCHCNT(spu_opcode_t op) default: { c->lea(addr->r64(), get_pc(m_pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->mov(arg1->r32(), op.ra); c->mov(*arg0, *cpu); @@ -2335,6 +2347,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) { c->bind(wait); c->lea(addr->r64(), get_pc(pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->mov(arg1->r32(), op.ra); c->mov(*arg0, *cpu); @@ -2361,6 +2374,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) { c->bind(upd); c->lea(addr->r64(), get_pc(pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->lea(arg1->r32(), MFC_WrTagMask); c->mov(*arg0, *cpu); @@ -2384,6 +2398,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) { c->bind(fail); c->lea(addr->r64(), get_pc(pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->mov(arg1->r32(), op.ra); c->mov(*arg0, *cpu); @@ -2451,6 +2466,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) c->mov(*addr, SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); c->mov(SPU_OFF_8(ch_mfc_cmd, &spu_mfc_cmd::cmd), addr->r8()); c->lea(addr->r64(), get_pc(m_pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->mov(*arg0, *cpu); c->call(imm_ptr(spu_wrch_mfc)); @@ -2515,6 +2531,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) } c->lea(addr->r64(), get_pc(m_pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->mov(arg1->r32(), op.ra); c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); @@ -2636,6 +2653,7 @@ void spu_recompiler::BISL(spu_opcode_t op) c->and_(*addr, 0x3fffc); const XmmLink& vr = XmmAlloc(); c->lea(*qw0, get_pc(m_pos + 4)); + c->and_(qw0->r32(), 0x3fffc); c->movd(vr, qw0->r32()); c->pslldq(vr, 12); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); @@ -2659,6 +2677,7 @@ void spu_recompiler::BISLED(spu_opcode_t op) const XmmLink& vr = XmmAlloc(); c->lea(*qw0, get_pc(m_pos + 4)); + c->and_(qw0->r32(), 0x3fffc); c->movd(vr, qw0->r32()); c->pslldq(vr, 12); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); @@ -3271,6 +3290,7 @@ void spu_recompiler::HGT(spu_opcode_t op) { c->bind(label); c->lea(addr->r64(), get_pc(pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->lock().bts(SPU_OFF_32(status), 2); c->mov(addr->r64(), reinterpret_cast(vm::base(0xffdead00))); @@ -3612,6 +3632,7 @@ void spu_recompiler::HLGT(spu_opcode_t op) { c->bind(label); c->lea(addr->r64(), get_pc(pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->lock().bts(SPU_OFF_32(status), 2); c->mov(addr->r64(), reinterpret_cast(vm::base(0xffdead00))); @@ -3905,6 +3926,7 @@ void spu_recompiler::HEQ(spu_opcode_t op) { c->bind(label); c->lea(addr->r64(), get_pc(pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->lock().bts(SPU_OFF_32(status), 2); c->mov(addr->r64(), reinterpret_cast(vm::base(0xffdead00))); @@ -4152,6 +4174,7 @@ void spu_recompiler::BRASL(spu_opcode_t op) const XmmLink& vr = XmmAlloc(); c->lea(addr->r64(), get_pc(m_pos + 4)); + c->and_(*addr, 0x3fffc); c->movd(vr, *addr); c->pslldq(vr, 12); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); @@ -4192,6 +4215,7 @@ void spu_recompiler::BRSL(spu_opcode_t op) const XmmLink& vr = XmmAlloc(); c->lea(addr->r64(), get_pc(m_pos + 4)); + c->and_(*addr, 0x3fffc); c->movd(vr, *addr); c->pslldq(vr, 12); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); @@ -4430,6 +4454,7 @@ void spu_recompiler::HGTI(spu_opcode_t op) { c->bind(label); c->lea(addr->r64(), get_pc(pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->lock().bts(SPU_OFF_32(status), 2); c->mov(addr->r64(), reinterpret_cast(vm::base(0xffdead00))); @@ -4474,6 +4499,7 @@ void spu_recompiler::HLGTI(spu_opcode_t op) { c->bind(label); c->lea(addr->r64(), get_pc(pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->lock().bts(SPU_OFF_32(status), 2); c->mov(addr->r64(), reinterpret_cast(vm::base(0xffdead00))); @@ -4536,6 +4562,7 @@ void spu_recompiler::HEQI(spu_opcode_t op) { c->bind(label); c->lea(addr->r64(), get_pc(pos)); + c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->lock().bts(SPU_OFF_32(status), 2); c->mov(addr->r64(), reinterpret_cast(vm::base(0xffdead00))); diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 657d90ef408..3373f2e7e1a 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -4028,7 +4028,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Update PC for current or explicitly specified instruction address void update_pc(u32 target = -1) { - m_ir->CreateStore(get_pc(target + 1 ? target : m_pos), spu_ptr(&spu_thread::pc), true); + m_ir->CreateStore(m_ir->CreateAnd(get_pc(target + 1 ? target : m_pos), 0x3fffc), spu_ptr(&spu_thread::pc), true); } // Call cpu_thread::check_state if necessary and return or continue (full check) @@ -8096,7 +8096,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator return; } - set_vr(op.rt, insert(splat(0), 3, value(get_pc(m_pos + 4)))); + set_vr(op.rt, insert(splat(0), 3, value(get_pc(m_pos + 4)) & 0x3fffc)); if (m_finfo && m_finfo->fn) { From a921af1e9608688bf59deec8f97541a9f02372bc Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 16 May 2019 00:52:52 +0300 Subject: [PATCH 08/11] SPU LLVM/ASMJIT: remove minor unnecessary code --- rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 9 --------- rpcs3/Emu/Cell/SPURecompiler.cpp | 2 -- 2 files changed, 11 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 85fe1a9e43a..a4117a4319d 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -1347,15 +1347,6 @@ void spu_recompiler::STOP(spu_opcode_t op) c->jmp(label_stop); m_pos = -1; } - else - { - Label label_next = c->newLabel(); - c->cmp(SPU_OFF_32(state), 0); - c->jz(label_next); - c->mov(*arg0, *cpu); - c->call(imm_ptr(&check_state)); - c->bind(label_next); - } } void spu_recompiler::LNOP(spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 3373f2e7e1a..e37abb2dbe6 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -5124,8 +5124,6 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator tail_chunk(m_dispatch); return; } - - check_state(m_pos + 4); } void STOPD(spu_opcode_t op) // From b138d25b97edd0b6d1c81049f6c401fd11abdc20 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 16 May 2019 00:53:37 +0300 Subject: [PATCH 09/11] SPU Disasm: fix absolute addressing in some instructions. STQA, LQA, BRA, BRASL instructions. --- rpcs3/Emu/Cell/SPUDisAsm.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUDisAsm.h b/rpcs3/Emu/Cell/SPUDisAsm.h index 4a417dbbd7d..001c5a4ffac 100644 --- a/rpcs3/Emu/Cell/SPUDisAsm.h +++ b/rpcs3/Emu/Cell/SPUDisAsm.h @@ -724,7 +724,7 @@ class SPUDisAsm final : public PPCDisAsm } void STQA(spu_opcode_t op) { - DisAsm("stqa", spu_reg_name[op.rt], DisAsmBranchTarget(op.i16)); + DisAsm("stqa", spu_reg_name[op.rt], DisAsmBranchTarget(op.i16 - dump_pc / 4)); } void BRNZ(spu_opcode_t op) { @@ -744,15 +744,15 @@ class SPUDisAsm final : public PPCDisAsm } void BRA(spu_opcode_t op) { - DisAsm("bra", DisAsmBranchTarget(op.i16)); + DisAsm("bra", DisAsmBranchTarget(op.i16 - dump_pc / 4)); } void LQA(spu_opcode_t op) { - DisAsm("lqa", spu_reg_name[op.rt], DisAsmBranchTarget(op.i16)); + DisAsm("lqa", spu_reg_name[op.rt], DisAsmBranchTarget(op.i16 - dump_pc / 4)); } void BRASL(spu_opcode_t op) { - DisAsm("brasl", spu_reg_name[op.rt], DisAsmBranchTarget(op.i16)); + DisAsm("brasl", spu_reg_name[op.rt], DisAsmBranchTarget(op.i16 - dump_pc / 4)); } void BR(spu_opcode_t op) { @@ -908,7 +908,7 @@ class SPUDisAsm final : public PPCDisAsm //0 - 6 void HBRA(spu_opcode_t op) { - DisAsm("hbra", DisAsmBranchTarget((op.r0h << 7) | op.rt), DisAsmBranchTarget(op.i16)); + DisAsm("hbra", DisAsmBranchTarget((op.r0h << 7) | op.rt), DisAsmBranchTarget(op.i16 - dump_pc / 4)); } void HBRR(spu_opcode_t op) { From f95ec8a37c64b45665e8199c4b99a7dc6a149784 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 16 May 2019 00:54:50 +0300 Subject: [PATCH 10/11] SPU LLVM: simplify jump table computation Remove one add operation and adjust constants instead. --- rpcs3/Emu/Cell/SPURecompiler.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index e37abb2dbe6..71a6ea4ba6a 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7817,7 +7817,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (!op.d && !op.e && tfound != m_targets.end() && tfound->second.size() > 1) { // Shift aligned address for switch - const auto addrfx = m_ir->CreateAdd(m_ir->CreateSub(addr.value, m_base_pc), m_ir->getInt32(m_base)); + const auto addrfx = m_ir->CreateSub(addr.value, m_base_pc); const auto sw_arg = m_ir->CreateLShr(addrfx, 2, "", true); // Initialize jump table targets @@ -7860,12 +7860,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (found != targets.end()) { - sw->addCase(m_ir->getInt32(pos / 4), found->second); + sw->addCase(m_ir->getInt32(pos / 4 - m_base / 4), found->second); continue; } } - sw->addCase(m_ir->getInt32(pos / 4), sw->getDefaultDest()); + sw->addCase(m_ir->getInt32(pos / 4 - m_base / 4), sw->getDefaultDest()); } // Exit function on unexpected target From 91897fa69d92ad7e45bb83c011232de65eef70f7 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 16 May 2019 02:41:31 +0300 Subject: [PATCH 11/11] SPU LLVM/ASMJIT: fix BRA/BRASL instructions for PIC Handle absolute branch addressing correctly. --- rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 51 ++++++--- rpcs3/Emu/Cell/SPUASMJITRecompiler.h | 2 +- rpcs3/Emu/Cell/SPURecompiler.cpp | 143 +++++++++++++------------ 3 files changed, 110 insertions(+), 86 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index a4117a4319d..91dfe259680 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -952,7 +952,7 @@ static void check_state(spu_thread* _spu) } } -void spu_recompiler::branch_fixed(u32 target) +void spu_recompiler::branch_fixed(u32 target, bool absolute) { using namespace asmjit; @@ -961,6 +961,15 @@ void spu_recompiler::branch_fixed(u32 target) if (local != instr_labels.end() && local->second.isValid()) { + Label fail; + + if (absolute) + { + fail = c->newLabel(); + c->cmp(pc0->r32(), m_base); + c->jne(fail); + } + c->cmp(SPU_OFF_32(state), 0); c->jz(local->second); c->lea(addr->r64(), get_pc(target)); @@ -969,14 +978,30 @@ void spu_recompiler::branch_fixed(u32 target) c->mov(*arg0, *cpu); c->call(imm_ptr(&check_state)); c->jmp(local->second); - return; + + if (absolute) + { + c->bind(fail); + } + else + { + return; + } } const auto ppptr = !g_cfg.core.spu_verification ? nullptr : m_spurt->make_branch_patchpoint(); - c->lea(addr->r64(), get_pc(target)); - c->and_(*addr, 0x3fffc); - c->mov(SPU_OFF_32(pc), *addr); + if (absolute) + { + c->mov(SPU_OFF_32(pc), target); + } + else + { + c->lea(addr->r64(), get_pc(target)); + c->and_(*addr, 0x3fffc); + c->mov(SPU_OFF_32(pc), *addr); + } + c->xor_(rip->r32(), rip->r32()); c->cmp(SPU_OFF_32(state), 0); c->jnz(label_stop); @@ -4132,11 +4157,8 @@ void spu_recompiler::BRA(spu_opcode_t op) { const u32 target = spu_branch_target(0, op.i16); - if (target != m_pos + 4) - { - branch_fixed(target); - m_pos = -1; - } + branch_fixed(target, true); + m_pos = -1; } void spu_recompiler::LQA(spu_opcode_t op) @@ -4170,12 +4192,9 @@ void spu_recompiler::BRASL(spu_opcode_t op) c->pslldq(vr, 12); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); - if (target != m_pos + 4) - { - branch_set_link(m_pos + 4); - branch_fixed(target); - m_pos = -1; - } + branch_set_link(m_pos + 4); + branch_fixed(target, true); + m_pos = -1; } void spu_recompiler::BR(spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h index f69b68e56f4..a736844c379 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h @@ -90,7 +90,7 @@ class spu_recompiler : public spu_recompiler_base asmjit::X86Mem XmmConst(__m128i data); asmjit::X86Mem get_pc(u32 addr); - void branch_fixed(u32 target); + void branch_fixed(u32 target, bool absolute = false); void branch_indirect(spu_opcode_t op, bool jt = false, bool ret = true); void branch_set_link(u32 target); void fall(spu_opcode_t op); diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 71a6ea4ba6a..e52d9228c41 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -1344,14 +1344,7 @@ const std::vector& spu_recompiler_base::analyse(const be_t* ls, u32 en { const u32 target = spu_branch_target(av); - if (target == pos + 4) - { - LOG_WARNING(SPU, "[0x%x] At 0x%x: indirect branch to next%s", result[0], pos, op.d ? " (D)" : op.e ? " (E)" : ""); - } - else - { - LOG_WARNING(SPU, "[0x%x] At 0x%x: indirect branch to 0x%x", result[0], pos, target); - } + LOG_WARNING(SPU, "[0x%x] At 0x%x: indirect branch to 0x%x%s", result[0], pos, target, op.d ? " (D)" : op.e ? " (E)" : ""); m_targets[pos].push_back(target); @@ -1368,11 +1361,7 @@ const std::vector& spu_recompiler_base::analyse(const be_t* ls, u32 en } else { - if (op.d || op.e) - { - m_entry_info[target / 4] = true; - } - + m_entry_info[target / 4] = true; add_block(target); } } @@ -1578,7 +1567,7 @@ const std::vector& spu_recompiler_base::analyse(const be_t* ls, u32 en vflags[op.rt] = +vf::is_const; values[op.rt] = pos + 4; - if (target == pos + 4) + if (type == spu_itype::BRSL && target == pos + 4) { // Get next instruction address idiom break; @@ -1616,14 +1605,39 @@ const std::vector& spu_recompiler_base::analyse(const be_t* ls, u32 en break; } - case spu_itype::BR: case spu_itype::BRA: + { + const u32 target = spu_branch_target(0, op.i16); + + if (g_cfg.core.spu_block_size == spu_block_size_type::giga && !sync) + { + m_entry_info[target / 4] = true; + add_block(target); + } + else + { + if (g_cfg.core.spu_block_size == spu_block_size_type::giga) + { + LOG_NOTICE(SPU, "[0x%x] At 0x%x: ignoring fixed tail call to 0x%x (SYNC)", result[0], pos, target); + } + + if (target > entry_point) + { + limit = std::min(limit, target); + } + } + + next_block(); + break; + } + + case spu_itype::BR: case spu_itype::BRZ: case spu_itype::BRNZ: case spu_itype::BRHZ: case spu_itype::BRHNZ: { - const u32 target = spu_branch_target(type == spu_itype::BRA ? 0 : pos, op.i16); + const u32 target = spu_branch_target(pos, op.i16); if (target == pos + 4) { @@ -1634,7 +1648,7 @@ const std::vector& spu_recompiler_base::analyse(const be_t* ls, u32 en m_targets[pos].push_back(target); add_block(target); - if (type != spu_itype::BR && type != spu_itype::BRA) + if (type != spu_itype::BR) { m_targets[pos].push_back(pos + 4); add_block(pos + 4); @@ -2193,6 +2207,10 @@ const std::vector& spu_recompiler_base::analyse(const be_t* ls, u32 en case spu_itype::BRASL: is_call = spu_branch_target(0, op.i16) != ia + 4; break; + case spu_itype::BRA: + is_call = true; + is_tail = true; + break; case spu_itype::BISL: case spu_itype::BISLED: is_call = true; @@ -2779,21 +2797,19 @@ const std::vector& spu_recompiler_base::analyse(const be_t* ls, u32 en switch (last_inst) { case spu_itype::BR: - case spu_itype::BRA: case spu_itype::BRNZ: case spu_itype::BRZ: case spu_itype::BRHNZ: case spu_itype::BRHZ: case spu_itype::BRSL: - case spu_itype::BRASL: { - const u32 target = spu_branch_target(last_inst == spu_itype::BRA || last_inst == spu_itype::BRASL ? 0 : tia, op.i16); + const u32 target = spu_branch_target(tia, op.i16); if (target == tia + 4) { bb.terminator = term_type::fallthrough; } - else if (last_inst != spu_itype::BRSL && last_inst != spu_itype::BRASL) + else if (last_inst != spu_itype::BRSL) { // No-op terminator or simple branch instruction bb.terminator = term_type::br; @@ -2815,6 +2831,12 @@ const std::vector& spu_recompiler_base::analyse(const be_t* ls, u32 en break; } + case spu_itype::BRA: + case spu_itype::BRASL: + { + bb.terminator = term_type::indirect_call; + break; + } case spu_itype::BI: { if (op.d || op.e || bb.targets.size() == 1) @@ -3449,7 +3471,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } // Add block with current block as a predecessor - llvm::BasicBlock* add_block(u32 target) + llvm::BasicBlock* add_block(u32 target, bool absolute = false) { // Check the predecessor const bool pred_found = m_block_info[target / 4] && m_preds[target].find_first_of(m_pos) + 1; @@ -3497,6 +3519,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator m_ir->SetInsertPoint(result); const auto pfinfo = add_function(target); + if (absolute) + { + verify(HERE), !m_finfo->fn; + + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto fail = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(m_ir->CreateICmpEQ(m_base_pc, m_ir->getInt32(m_base)), next, fail); + m_ir->SetInsertPoint(fail); + m_ir->CreateStore(m_ir->getInt32(target), spu_ptr(&spu_thread::pc), true); + tail_chunk(nullptr); + m_ir->SetInsertPoint(next); + } + if (pfinfo->fn) { // Tail call to the real function @@ -3525,12 +3560,25 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto cblock = m_ir->GetInsertBlock(); const auto result = llvm::BasicBlock::Create(m_context, "", m_function); m_ir->SetInsertPoint(result); - update_pc(target); + + if (absolute) + { + verify(HERE), !m_finfo->fn; + + m_ir->CreateStore(m_ir->getInt32(target), spu_ptr(&spu_thread::pc), true); + } + else + { + update_pc(target); + } + tail_chunk(nullptr); m_ir->SetInsertPoint(cblock); return result; } + verify(HERE), !absolute; + auto& result = m_blocks[target].block; if (!result) @@ -7639,34 +7687,11 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator return result; } - // Convert an indirect branch into a static one if possible - if (const auto _int = llvm::dyn_cast(addr.value); _int && op.opcode) + if (llvm::isa(addr.value)) { - const u32 target = ::narrow(_int->getZExtValue(), HERE); - - LOG_WARNING(SPU, "[0x%x] Fixed branch to 0x%x", m_pos, target); - - if (!op.e && !op.d) - { - return add_block(target); - } - - if (!m_entry_info[target / 4]) - { - LOG_ERROR(SPU, "[0x%x] Fixed branch to 0x%x", m_pos, target); - } - else - { - add_function(target); - } - // Fixed branch excludes the possibility it's a function return (TODO) ret = false; } - else if (llvm::isa(addr.value) && op.opcode) - { - LOG_ERROR(SPU, "[0x%x] Unexpected constant (add_block_indirect)", m_pos); - } if (m_finfo && m_finfo->fn && op.opcode) { @@ -8011,33 +8036,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const u32 target = spu_branch_target(0, op.i16); - if (target != m_pos + 4) - { - m_block->block_end = m_ir->GetInsertBlock(); - m_ir->CreateBr(add_block(target)); - } + m_block->block_end = m_ir->GetInsertBlock(); + m_ir->CreateBr(add_block(target, true)); } void BRASL(spu_opcode_t op) // { set_link(op); - - const u32 target = spu_branch_target(0, op.i16); - - if (m_finfo && m_finfo->fn && target != m_pos + 4) - { - if (auto fn = add_function(target)->fn) - { - call_function(fn); - return; - } - else - { - LOG_FATAL(SPU, "[0x%x] Can't add function 0x%x", m_pos, target); - return; - } - } - BRA(op); }