From ccb528a55fb669a4c2e9f1bd89fe03def9f791b6 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 25 Apr 2019 13:43:03 +0300 Subject: [PATCH] SPU LLVM: improve codegen in loops Use a trick in check_state to improve LICM pass. --- rpcs3/Emu/Cell/SPURecompiler.cpp | 56 +++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index b7d2cd5e59fc..cc6b146b57e1 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -2129,6 +2129,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator llvm::GlobalVariable* m_scale_float_to{}; llvm::GlobalVariable* m_scale_to_float{}; + // Helper for check_state + llvm::GlobalVariable* m_fake_global1{}; + llvm::MDNode* m_md_unlikely; llvm::MDNode* m_md_likely; @@ -2962,9 +2965,10 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator m_ir->CreateCondBr(m_ir->CreateICmpEQ(m_ir->CreateLoad(pstate), m_ir->getInt32(0)), _body, check, m_md_likely); m_ir->SetInsertPoint(check); m_ir->CreateStore(m_ir->getInt32(addr), spu_ptr(&spu_thread::pc)); - m_ir->CreateCondBr(call(&exec_check_state, m_thread), stop, _body, m_md_unlikely); + m_ir->CreateCondBr(m_ir->CreateLoad(m_fake_global1, true), stop, _body, m_md_unlikely); m_ir->SetInsertPoint(stop); - m_ir->CreateRetVoid(); + m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true); + m_ir->CreateBr(_body); m_ir->SetInsertPoint(_body); } @@ -3125,6 +3129,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator IRBuilder<> irb(m_context); m_ir = &irb; + // Helper for check_state. Used to not interfere with LICM pass. + m_fake_global1 = new llvm::GlobalVariable(*m_module, get_type(), false, llvm::GlobalValue::InternalLinkage, m_ir->getFalse()); + // Add entry function (contains only state/code check) const auto main_func = llvm::cast(m_module->getOrInsertFunction(hash, get_ftype()).getCallee()); const auto main_arg2 = &*(main_func->arg_begin() + 2); @@ -3508,13 +3515,54 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator pm.add(createCFGSimplificationPass()); pm.add(createNewGVNPass()); pm.add(createDeadStoreEliminationPass()); - pm.add(createLoopVersioningLICMPass()); + pm.add(createLICMPass()); pm.add(createAggressiveDCEPass()); //pm.add(createLintPass()); // Check for (const auto& func : m_functions) { - pm.run(*func.second.func); + const auto f = func.second.func; + pm.run(*f); + + for (auto& bb : *f) + { + for (auto& i : bb) + { + // Replace volatile fake load with check_state call + if (auto li = dyn_cast(&i); li && li->getOperand(0) == m_fake_global1) + { + m_ir->SetInsertPoint(bb.getTerminator()); + li->replaceAllUsesWith(call(&exec_check_state, &*f->arg_begin())); + li->eraseFromParent(); + break; + } + + // Replace volatile fake store with return + if (auto si = dyn_cast(&i); si && si->getOperand(1) == m_fake_global1) + { + const auto br = bb.getTerminator(); + + for (auto& j : *br->getSuccessor(0)) + { + // Cleanup PHI nodes if exist + if (auto phi = dyn_cast(&j)) + { + phi->removeIncomingValue(&bb, false); + } + else + { + break; + } + } + + m_ir->SetInsertPoint(bb.getTerminator()); + m_ir->CreateRetVoid(); + si->eraseFromParent(); + br->eraseFromParent(); + break; + } + } + } } // Clear context (TODO)