From a38617a345ba44bab14e097d804a740cf3616db3 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sun, 12 May 2019 03:22:14 +0300 Subject: [PATCH] SPU LLVM: use branch patchpoints again Renewed and adapted for PIC and all branch types. This may address performance degradation after #5923. --- rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 2 +- rpcs3/Emu/Cell/SPURecompiler.cpp | 94 +++++++++++++------------- rpcs3/Emu/Cell/SPURecompiler.h | 4 +- 3 files changed, 51 insertions(+), 49 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 6d99461c6c1b..2217518ccecd 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -954,7 +954,7 @@ void spu_recompiler::branch_fixed(u32 target) return; } - const auto ppptr = m_spurt->make_branch_patchpoint(target); + const auto ppptr = m_spurt->make_branch_patchpoint(); c->mov(SPU_OFF_32(pc), target); c->xor_(qw0->r32(), qw0->r32()); diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 32d0dc2fa119..5b7b6d7d7c0f 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -859,7 +859,7 @@ void* spu_runtime::find(u64 last_reset_count, const std::vector& func) return fn_location; } -spu_function_t spu_runtime::find(const se_t* ls, u32 addr) const +spu_function_t spu_runtime::find(const u32* ls, u32 addr) const { const u64 reset_count = m_reset_count; @@ -870,42 +870,22 @@ spu_function_t spu_runtime::find(const se_t* ls, u32 addr) const return nullptr; } - // Scratch vector - static thread_local std::vector addrv{u32{0}}; - - const u32 start = addr * (g_cfg.core.spu_block_size != spu_block_size_type::giga); - - addrv[0] = addr; - const auto beg = m_map.lower_bound(addrv); - addrv[0] += 4; - const auto _end = m_map.lower_bound(addrv); + const auto upper = m_pic_map.upper_bound({ls + addr / 4, (0x40000 - addr) / 4}); - for (auto it = beg; it != _end; ++it) + if (upper != m_pic_map.begin() && !m_pic_map.empty()) { - bool bad = false; - - for (u32 i = 1; i < it->first.size(); ++i) - { - const u32 x = it->first[i]; - const u32 y = ls[start / 4 + i - 1]; - - if (x && x != y) - { - bad = true; - break; - } - } + const auto found = std::prev(upper); - if (!bad) + if (found->first.compare(0, found->first.size(), ls + addr / 4, found->first.size()) == 0) { - return it->second; + return found->second; } } return nullptr; } -spu_function_t spu_runtime::make_branch_patchpoint(u32 target) const +spu_function_t spu_runtime::make_branch_patchpoint() const { u8* const raw = jit_runtime::alloc(16, 16); @@ -931,10 +911,8 @@ spu_function_t spu_runtime::make_branch_patchpoint(u32 target) const const s64 rel = reinterpret_cast(tr_branch) - reinterpret_cast(raw + 8) - 5; std::memcpy(raw + 9, &rel, 4); raw[13] = 0xcc; - - // Write compressed target address - raw[14] = target >> 2; - raw[15] = target >> 10; + raw[14] = 0; + raw[15] = 0; return reinterpret_cast(raw); } @@ -1013,7 +991,6 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip) // If code verification failed from a patched patchpoint, clear it with a dispatcher jump if (rip) { - const u32 target = *(u16*)(rip + 6) * 4; const s64 rel = reinterpret_cast(spu_runtime::g_dispatcher) - reinterpret_cast(rip - 8) - 6; union @@ -1056,7 +1033,7 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip) void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip) { // Find function - const auto func = spu.jit->get_runtime().find(spu._ptr>(0), *(u16*)(rip + 6) * 4); + const auto func = spu.jit->get_runtime().find(static_cast(vm::base(spu.offset)), spu.pc); if (!func) { @@ -1089,9 +1066,8 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip) bytes[5] = 0xcc; } - // Preserve target address - bytes[6] = rip[6]; - bytes[7] = rip[7]; + bytes[6] = 0; + bytes[7] = 0; } else { @@ -3118,6 +3094,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Main entry point offset u32 m_base; + // Module name + std::string m_hash; + // Current function (chunk) llvm::Function* m_function; @@ -3279,6 +3258,29 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Create tail call to the function chunk (non-tail calls are just out of question) void tail_chunk(llvm::Value* chunk, llvm::Value* base_pc = nullptr) { + if (!chunk) + { + // Create branch patchpoint if chunk == nullptr + verify(HERE), m_finfo, !m_finfo->fn; + + // Register under a unique linkable name + const std::string ppname = fmt::format("%s-pp-0x%05x", m_hash, m_pos); + m_engine->addGlobalMapping(ppname, (u64)m_spurt->make_branch_patchpoint()); + + // Create function with not exactly correct type + const auto ppfunc = llvm::cast(m_module->getOrInsertFunction(ppname, m_finfo->chunk->getFunctionType()).getCallee()); + ppfunc->setCallingConv(m_finfo->chunk->getCallingConv()); + + if (true) + { + m_ir->CreateRet(m_ir->CreateBitCast(ppfunc, get_type())); + return; + } + + chunk = ppfunc; + base_pc = m_ir->getInt32(0); + } + auto call = m_ir->CreateCall(chunk, {m_thread, m_lsptr, base_pc ? base_pc : m_base_pc}); auto func = m_finfo ? m_finfo->chunk : llvm::cast(chunk); call->setCallingConv(func->getCallingConv()); @@ -3467,7 +3469,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto result = llvm::BasicBlock::Create(m_context, "", m_function); m_ir->SetInsertPoint(result); update_pc(target); - tail_chunk(m_dispatch); + tail_chunk(nullptr); m_ir->SetInsertPoint(cblock); return result; } @@ -4037,7 +4039,6 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator m_cache->add(func); } - std::string hash; { sha1_context ctx; u8 output[20]; @@ -4046,16 +4047,17 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator sha1_update(&ctx, reinterpret_cast(func.data() + 1), func.size() * 4 - 4); sha1_finish(&ctx, output); - fmt::append(hash, "spu-0x%05x-%s", func[0], fmt::base57(output)); + m_hash.clear(); + fmt::append(m_hash, "spu-0x%05x-%s", func[0], fmt::base57(output)); } if (m_cache) { - LOG_SUCCESS(SPU, "LLVM: Building %s (size %u)...", hash, func.size() - 1); + LOG_SUCCESS(SPU, "LLVM: Building %s (size %u)...", m_hash, func.size() - 1); } else { - LOG_NOTICE(SPU, "Building function 0x%x... (size %u, %s)", func[0], func.size() - 1, hash); + LOG_NOTICE(SPU, "Building function 0x%x... (size %u, %s)", func[0], func.size() - 1, m_hash); } SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode); @@ -4075,7 +4077,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (g_cfg.core.spu_debug) { std::string log; - fmt::append(log, "========== SPU BLOCK 0x%05x (size %u, %s) ==========\n\n", func[0], func.size() - 1, hash); + fmt::append(log, "========== SPU BLOCK 0x%05x (size %u, %s) ==========\n\n", func[0], func.size() - 1, m_hash); // Disassemble if necessary for (u32 i = 1; i < func.size(); i++) @@ -4106,7 +4108,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator using namespace llvm; // Create LLVM module - std::unique_ptr module = std::make_unique(hash + ".obj", m_context); + std::unique_ptr module = std::make_unique(m_hash + ".obj", m_context); module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout()); m_module = module.get(); @@ -4119,7 +4121,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator m_fake_global1 = new llvm::GlobalVariable(*m_module, get_type(), false, llvm::GlobalValue::InternalLinkage, m_ir->getFalse()); // Add entry function (contains only state/code check) - const auto main_func = llvm::cast(m_module->getOrInsertFunction(hash, get_ftype()).getCallee()); + const auto main_func = llvm::cast(m_module->getOrInsertFunction(m_hash, get_ftype()).getCallee()); const auto main_arg2 = &*(main_func->arg_begin() + 2); main_func->setCallingConv(CallingConv::GHC); set_function(main_func); @@ -7713,7 +7715,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator m_ir->SetInsertPoint(fail); } - tail_chunk(m_dispatch); + tail_chunk(nullptr); m_ir->SetInsertPoint(cblock); return result; } @@ -7852,7 +7854,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } else { - tail_chunk(m_dispatch); + tail_chunk(nullptr); } } else diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 944c1f71d831..b9f488d6cf20 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -82,10 +82,10 @@ class spu_runtime void* find(u64 last_reset_count, const std::vector&); // Find existing function - spu_function_t find(const se_t* ls, u32 addr) const; + spu_function_t find(const u32* ls, u32 addr) const; // Generate a patchable trampoline to spu_recompiler_base::branch - spu_function_t make_branch_patchpoint(u32 target) const; + spu_function_t make_branch_patchpoint() const; // reset() arg retriever, for race avoidance (can result in double reset) u64 get_reset_count() const