Skip to content

Commit

Permalink
SPU LLVM: use branch patchpoints again
Browse files Browse the repository at this point in the history
Renewed and adapted for PIC and all branch types.
This may address performance degradation after RPCS3#5923.
  • Loading branch information
Nekotekina committed May 12, 2019
1 parent 8ef7e26 commit a38617a
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 49 deletions.
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
Expand Up @@ -954,7 +954,7 @@ void spu_recompiler::branch_fixed(u32 target)
return;
}

const auto ppptr = m_spurt->make_branch_patchpoint(target);
const auto ppptr = m_spurt->make_branch_patchpoint();

c->mov(SPU_OFF_32(pc), target);
c->xor_(qw0->r32(), qw0->r32());
Expand Down
94 changes: 48 additions & 46 deletions rpcs3/Emu/Cell/SPURecompiler.cpp
Expand Up @@ -859,7 +859,7 @@ void* spu_runtime::find(u64 last_reset_count, const std::vector<u32>& func)
return fn_location;
}

spu_function_t spu_runtime::find(const se_t<u32, false>* ls, u32 addr) const
spu_function_t spu_runtime::find(const u32* ls, u32 addr) const
{
const u64 reset_count = m_reset_count;

Expand All @@ -870,42 +870,22 @@ spu_function_t spu_runtime::find(const se_t<u32, false>* ls, u32 addr) const
return nullptr;
}

// Scratch vector
static thread_local std::vector<u32> addrv{u32{0}};

const u32 start = addr * (g_cfg.core.spu_block_size != spu_block_size_type::giga);

addrv[0] = addr;
const auto beg = m_map.lower_bound(addrv);
addrv[0] += 4;
const auto _end = m_map.lower_bound(addrv);
const auto upper = m_pic_map.upper_bound({ls + addr / 4, (0x40000 - addr) / 4});

for (auto it = beg; it != _end; ++it)
if (upper != m_pic_map.begin() && !m_pic_map.empty())
{
bool bad = false;

for (u32 i = 1; i < it->first.size(); ++i)
{
const u32 x = it->first[i];
const u32 y = ls[start / 4 + i - 1];

if (x && x != y)
{
bad = true;
break;
}
}
const auto found = std::prev(upper);

if (!bad)
if (found->first.compare(0, found->first.size(), ls + addr / 4, found->first.size()) == 0)
{
return it->second;
return found->second;
}
}

return nullptr;
}

spu_function_t spu_runtime::make_branch_patchpoint(u32 target) const
spu_function_t spu_runtime::make_branch_patchpoint() const
{
u8* const raw = jit_runtime::alloc(16, 16);

Expand All @@ -931,10 +911,8 @@ spu_function_t spu_runtime::make_branch_patchpoint(u32 target) const
const s64 rel = reinterpret_cast<u64>(tr_branch) - reinterpret_cast<u64>(raw + 8) - 5;
std::memcpy(raw + 9, &rel, 4);
raw[13] = 0xcc;

// Write compressed target address
raw[14] = target >> 2;
raw[15] = target >> 10;
raw[14] = 0;
raw[15] = 0;

return reinterpret_cast<spu_function_t>(raw);
}
Expand Down Expand Up @@ -1013,7 +991,6 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip)
// If code verification failed from a patched patchpoint, clear it with a dispatcher jump
if (rip)
{
const u32 target = *(u16*)(rip + 6) * 4;
const s64 rel = reinterpret_cast<u64>(spu_runtime::g_dispatcher) - reinterpret_cast<u64>(rip - 8) - 6;

union
Expand Down Expand Up @@ -1056,7 +1033,7 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip)
void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip)
{
// Find function
const auto func = spu.jit->get_runtime().find(spu._ptr<se_t<u32, false>>(0), *(u16*)(rip + 6) * 4);
const auto func = spu.jit->get_runtime().find(static_cast<u32*>(vm::base(spu.offset)), spu.pc);

if (!func)
{
Expand Down Expand Up @@ -1089,9 +1066,8 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip)
bytes[5] = 0xcc;
}

// Preserve target address
bytes[6] = rip[6];
bytes[7] = rip[7];
bytes[6] = 0;
bytes[7] = 0;
}
else
{
Expand Down Expand Up @@ -3118,6 +3094,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// Main entry point offset
u32 m_base;

// Module name
std::string m_hash;

// Current function (chunk)
llvm::Function* m_function;

Expand Down Expand Up @@ -3279,6 +3258,29 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// Create tail call to the function chunk (non-tail calls are just out of question)
void tail_chunk(llvm::Value* chunk, llvm::Value* base_pc = nullptr)
{
if (!chunk)
{
// Create branch patchpoint if chunk == nullptr
verify(HERE), m_finfo, !m_finfo->fn;

// Register under a unique linkable name
const std::string ppname = fmt::format("%s-pp-0x%05x", m_hash, m_pos);
m_engine->addGlobalMapping(ppname, (u64)m_spurt->make_branch_patchpoint());

// Create function with not exactly correct type
const auto ppfunc = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(ppname, m_finfo->chunk->getFunctionType()).getCallee());
ppfunc->setCallingConv(m_finfo->chunk->getCallingConv());

if (true)
{
m_ir->CreateRet(m_ir->CreateBitCast(ppfunc, get_type<u8*>()));
return;
}

chunk = ppfunc;
base_pc = m_ir->getInt32(0);
}

auto call = m_ir->CreateCall(chunk, {m_thread, m_lsptr, base_pc ? base_pc : m_base_pc});
auto func = m_finfo ? m_finfo->chunk : llvm::cast<llvm::Function>(chunk);
call->setCallingConv(func->getCallingConv());
Expand Down Expand Up @@ -3467,7 +3469,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto result = llvm::BasicBlock::Create(m_context, "", m_function);
m_ir->SetInsertPoint(result);
update_pc(target);
tail_chunk(m_dispatch);
tail_chunk(nullptr);
m_ir->SetInsertPoint(cblock);
return result;
}
Expand Down Expand Up @@ -4037,7 +4039,6 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_cache->add(func);
}

std::string hash;
{
sha1_context ctx;
u8 output[20];
Expand All @@ -4046,16 +4047,17 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
sha1_update(&ctx, reinterpret_cast<const u8*>(func.data() + 1), func.size() * 4 - 4);
sha1_finish(&ctx, output);

fmt::append(hash, "spu-0x%05x-%s", func[0], fmt::base57(output));
m_hash.clear();
fmt::append(m_hash, "spu-0x%05x-%s", func[0], fmt::base57(output));
}

if (m_cache)
{
LOG_SUCCESS(SPU, "LLVM: Building %s (size %u)...", hash, func.size() - 1);
LOG_SUCCESS(SPU, "LLVM: Building %s (size %u)...", m_hash, func.size() - 1);
}
else
{
LOG_NOTICE(SPU, "Building function 0x%x... (size %u, %s)", func[0], func.size() - 1, hash);
LOG_NOTICE(SPU, "Building function 0x%x... (size %u, %s)", func[0], func.size() - 1, m_hash);
}

SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode);
Expand All @@ -4075,7 +4077,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
if (g_cfg.core.spu_debug)
{
std::string log;
fmt::append(log, "========== SPU BLOCK 0x%05x (size %u, %s) ==========\n\n", func[0], func.size() - 1, hash);
fmt::append(log, "========== SPU BLOCK 0x%05x (size %u, %s) ==========\n\n", func[0], func.size() - 1, m_hash);

// Disassemble if necessary
for (u32 i = 1; i < func.size(); i++)
Expand Down Expand Up @@ -4106,7 +4108,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
using namespace llvm;

// Create LLVM module
std::unique_ptr<Module> module = std::make_unique<Module>(hash + ".obj", m_context);
std::unique_ptr<Module> module = std::make_unique<Module>(m_hash + ".obj", m_context);
module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout());
m_module = module.get();
Expand All @@ -4119,7 +4121,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_fake_global1 = new llvm::GlobalVariable(*m_module, get_type<bool>(), false, llvm::GlobalValue::InternalLinkage, m_ir->getFalse());

// Add entry function (contains only state/code check)
const auto main_func = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(hash, get_ftype<void, u8*, u8*, u64>()).getCallee());
const auto main_func = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(m_hash, get_ftype<void, u8*, u8*, u64>()).getCallee());
const auto main_arg2 = &*(main_func->arg_begin() + 2);
main_func->setCallingConv(CallingConv::GHC);
set_function(main_func);
Expand Down Expand Up @@ -7713,7 +7715,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_ir->SetInsertPoint(fail);
}

tail_chunk(m_dispatch);
tail_chunk(nullptr);
m_ir->SetInsertPoint(cblock);
return result;
}
Expand Down Expand Up @@ -7852,7 +7854,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
}
else
{
tail_chunk(m_dispatch);
tail_chunk(nullptr);
}
}
else
Expand Down
4 changes: 2 additions & 2 deletions rpcs3/Emu/Cell/SPURecompiler.h
Expand Up @@ -82,10 +82,10 @@ class spu_runtime
void* find(u64 last_reset_count, const std::vector<u32>&);

// Find existing function
spu_function_t find(const se_t<u32, false>* ls, u32 addr) const;
spu_function_t find(const u32* ls, u32 addr) const;

// Generate a patchable trampoline to spu_recompiler_base::branch
spu_function_t make_branch_patchpoint(u32 target) const;
spu_function_t make_branch_patchpoint() const;

// reset() arg retriever, for race avoidance (can result in double reset)
u64 get_reset_count() const
Expand Down

0 comments on commit a38617a

Please sign in to comment.