Skip to content

Commit

Permalink
SPU: fix Giga mode (kinda)
Browse files Browse the repository at this point in the history
Don't scan before the entry point.
Disable stack mirror in SPU LLVM.
Improve analyser logic for holes.
  • Loading branch information
Nekotekina committed May 14, 2019
1 parent c683545 commit 62f2766
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 41 deletions.
14 changes: 7 additions & 7 deletions rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vector<u
m_pos = func[0];
m_base = func[0];
m_size = ::size32(func) * 4 - 4;
const u32 start = m_pos * (g_cfg.core.spu_block_size != spu_block_size_type::giga);
const u32 start = m_pos;
const u32 end = start + m_size;

// Create block labels
Expand Down Expand Up @@ -226,7 +226,7 @@ spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vector<u
c->vzeroupper();
}
}
else if (m_size == 8 && (g_cfg.core.spu_block_size != spu_block_size_type::giga || func[0] != 4))
else if (m_size == 8)
{
c->mov(x86::rax, static_cast<u64>(func[2]) << 32 | func[1]);
c->cmp(x86::rax, x86::qword_ptr(*ls, *pc0));
Expand All @@ -237,9 +237,9 @@ spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vector<u
c->vzeroupper();
}
}
else if (m_size == 8 || m_size == 4)
else if (m_size == 4)
{
c->cmp(x86::dword_ptr(*ls, *pc0), +func.back());
c->cmp(x86::dword_ptr(*ls, *pc0), func[1]);
c->jnz(label_diff);

if (utils::has_avx())
Expand Down Expand Up @@ -724,7 +724,7 @@ spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vector<u
// Acknowledge success and add statistics
c->add(SPU_OFF_64(block_counter), ::size32(words) / (words_align / 4));

if (g_cfg.core.spu_block_size == spu_block_size_type::giga && m_pos != start)
if (m_pos != start)
{
// Jump to the entry point if necessary
c->jmp(instr_labels[m_pos]);
Expand Down Expand Up @@ -971,7 +971,7 @@ void spu_recompiler::branch_fixed(u32 target)
return;
}

const auto ppptr = g_cfg.core.spu_block_size == spu_block_size_type::giga || !g_cfg.core.spu_verification ? nullptr : m_spurt->make_branch_patchpoint();
const auto ppptr = !g_cfg.core.spu_verification ? nullptr : m_spurt->make_branch_patchpoint();

c->lea(addr->r64(), get_pc(target));
c->mov(SPU_OFF_32(pc), *addr);
Expand Down Expand Up @@ -1088,7 +1088,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
}

// Simply external call (return or indirect call)
const auto ppptr = g_cfg.core.spu_block_size == spu_block_size_type::giga || !g_cfg.core.spu_verification ? nullptr : m_spurt->make_branch_patchpoint();
const auto ppptr = !g_cfg.core.spu_verification ? nullptr : m_spurt->make_branch_patchpoint();

if (ppptr)
{
Expand Down
82 changes: 48 additions & 34 deletions rpcs3/Emu/Cell/SPURecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,12 @@ std::deque<std::vector<u32>> spu_cache::get()
break;
}

if (!size || !func[1])
{
// Skip old format Giga entries
continue;
}

result.emplace_front(std::move(func));
}

Expand Down Expand Up @@ -349,7 +355,7 @@ void spu_cache::initialize()
}

// Get data start
const u32 start = func[0] * (g_cfg.core.spu_block_size != spu_block_size_type::giga);
const u32 start = func[0];
const u32 size0 = ::size32(func);

// Initialize LS with function data only
Expand Down Expand Up @@ -448,7 +454,7 @@ bool spu_runtime::func_compare::operator()(const std::vector<u32>& lhs, const st
else if (rhs_data.empty())
return false;

if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
if (false)
{
// In Giga mode, compare instructions starting from the entry point first
lhs_data.remove_prefix(lhs_addr / 4);
Expand Down Expand Up @@ -507,31 +513,35 @@ bool spu_runtime::add(u64 last_reset_count, void* _where, spu_function_t compile
const std::vector<u32>& func = where.first;

//
const u32 _off = 1 + (func[0] / 4) * (g_cfg.core.spu_block_size == spu_block_size_type::giga);
const u32 _off = 1 + (func[0] / 4) * (false);

// Set pointer to the compiled function
where.second = compiled;

// Register function in PIC map
m_pic_map[{func.data() + _off, func.size() - _off}] = compiled;

// Prepare sorted list
m_flat_list.clear();
m_flat_list.assign(m_pic_map.cbegin(), m_pic_map.cend());

struct work
{
u32 size;
u16 from;
u16 level;
u8* rel32;
decltype(m_pic_map)::iterator beg;
decltype(m_pic_map)::iterator end;
decltype(m_flat_list)::iterator beg;
decltype(m_flat_list)::iterator end;
};

// Scratch vector
static thread_local std::vector<work> workload;

// Generate a dispatcher (übertrampoline)
const auto beg = m_pic_map.begin();
const auto _end = m_pic_map.end();
const u32 size0 = ::size32(m_pic_map);
const auto beg = m_flat_list.begin();
const auto _end = m_flat_list.end();
const u32 size0 = ::size32(m_flat_list);

if (size0 == 1)
{
Expand Down Expand Up @@ -630,6 +640,19 @@ bool spu_runtime::add(u64 last_reset_count, void* _where, spu_function_t compile
{
// Cannot split: some functions contain holes at this level
w.level++;

// Resort subrange starting from the new level
std::stable_sort(w.beg, w.end, [&](const auto& a, const auto& b)
{
std::basic_string_view<u32> lhs = a.first;
std::basic_string_view<u32> rhs = b.first;

lhs.remove_prefix(w.level);
rhs.remove_prefix(w.level);

return lhs < rhs;
});

continue;
}

Expand Down Expand Up @@ -662,7 +685,7 @@ bool spu_runtime::add(u64 last_reset_count, void* _where, spu_function_t compile
if (w.level >= w.beg->first.size() || w.level >= it->first.size())
{
// If functions cannot be compared, assume smallest function
LOG_FATAL(SPU, "Trampoline simplified at 0x%x (level=%u)", func[0], w.level);
LOG_ERROR(SPU, "Trampoline simplified at 0x%x (level=%u)", func[0], w.level);
make_jump(0xe9, w.beg->second); // jmp rel32
continue;
}
Expand All @@ -671,13 +694,13 @@ bool spu_runtime::add(u64 last_reset_count, void* _where, spu_function_t compile
const u32 x = it->first.at(w.level);

// Adjust ranges (backward)
while (it != m_pic_map.begin())
while (it != m_flat_list.begin())
{
it--;

if (w.level >= it->first.size())
{
it = m_pic_map.end();
it = m_flat_list.end();
break;
}

Expand All @@ -692,9 +715,9 @@ bool spu_runtime::add(u64 last_reset_count, void* _where, spu_function_t compile
size2++;
}

if (it == m_pic_map.end())
if (it == m_flat_list.end())
{
LOG_FATAL(SPU, "Trampoline simplified (II) at 0x%x (level=%u)", func[0], w.level);
LOG_ERROR(SPU, "Trampoline simplified (II) at 0x%x (level=%u)", func[0], w.level);
make_jump(0xe9, w.beg->second); // jmp rel32
continue;
}
Expand Down Expand Up @@ -824,7 +847,7 @@ void* spu_runtime::find(u64 last_reset_count, const std::vector<u32>& func)
}

//
const u32 _off = 1 + (func[0] / 4) * (g_cfg.core.spu_block_size == spu_block_size_type::giga);
const u32 _off = 1 + (func[0] / 4) * (false);

// Try to find PIC first
const auto found = m_pic_map.find({func.data() + _off, func.size() - _off});
Expand Down Expand Up @@ -1154,8 +1177,6 @@ const std::vector<u32>& spu_recompiler_base::analyse(const be_t<u32>* ls, u32 en

if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
{
// In Giga mode, all data starts from the address 0
lsa = 0;
}

for (u32 wi = 0, wa = workload[0]; wi < workload.size();)
Expand Down Expand Up @@ -1842,7 +1863,7 @@ const std::vector<u32>& spu_recompiler_base::analyse(const be_t<u32>* ls, u32 en
}
}

while (g_cfg.core.spu_block_size != spu_block_size_type::giga || limit < 0x40000)
while (lsa > 0 || limit < 0x40000)
{
const u32 initial_size = result.size();

Expand Down Expand Up @@ -2032,14 +2053,6 @@ const std::vector<u32>& spu_recompiler_base::analyse(const be_t<u32>* ls, u32 en
continue;
}

// Erase unreachable targets
const auto new_end = std::remove_if(it->second.begin(), it->second.end(), [&](u32 addr)
{
return addr < lsa || addr >= limit;
});

it->second.erase(new_end, it->second.end());

it++;
}

Expand Down Expand Up @@ -3013,7 +3026,7 @@ const std::vector<u32>& spu_recompiler_base::analyse(const be_t<u32>* ls, u32 en

if (f.second.good)
{
LOG_ERROR(SPU, "Function 0x%05x: calls bad function (0x%05x)", f.first, ffound->first);
LOG_ERROR(SPU, "Function 0x%05x: calls bad function (0x%05x)", f.first, call);
f.second.good = false;
}
}
Expand All @@ -3040,7 +3053,7 @@ void spu_recompiler_base::dump(std::string& out)
SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode);
dis_asm.offset = reinterpret_cast<const u8*>(result.data() + 1);

if (g_cfg.core.spu_block_size != spu_block_size_type::giga)
if (true)
{
dis_asm.offset -= result[0];
}
Expand Down Expand Up @@ -3299,15 +3312,15 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// Create tail call to the function chunk (non-tail calls are just out of question)
void tail_chunk(llvm::Value* chunk, llvm::Value* base_pc = nullptr)
{
if (!chunk && (g_cfg.core.spu_block_size == spu_block_size_type::giga || !g_cfg.core.spu_verification))
if (!chunk && !g_cfg.core.spu_verification)
{
// Disable patchpoints in some cases
// Disable patchpoints if verification is disabled
chunk = m_dispatch;
}
else if (!chunk)
{
// Create branch patchpoint if chunk == nullptr
verify(HERE), m_finfo, !m_finfo->fn;
verify(HERE), m_finfo, !m_finfo->fn || m_function == m_finfo->chunk;

// Register under a unique linkable name
const std::string ppname = fmt::format("%s-pp-0x%05x", m_hash, m_pos);
Expand Down Expand Up @@ -4111,7 +4124,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_pos = func[0];
m_base = func[0];
m_size = (func.size() - 1) * 4;
const u32 start = m_pos * (g_cfg.core.spu_block_size != spu_block_size_type::giga);
const u32 start = m_pos;
const u32 end = start + m_size;

if (g_cfg.core.spu_debug)
Expand Down Expand Up @@ -4169,7 +4182,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(pu32), m_ir->getInt32(func[1]));
m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely);
}
else if (func.size() - 1 == 2 && g_cfg.core.spu_block_size != spu_block_size_type::giga)
else if (func.size() - 1 == 2)
{
const auto pu64 = m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, m_base_pc), get_type<u64*>());
const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(pu64), m_ir->getInt64(static_cast<u64>(func[2]) << 32 | func[1]));
Expand Down Expand Up @@ -5617,6 +5630,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_ir->CreateUnreachable();
m_ir->SetInsertPoint(next);
m_ir->CreateStore(ci, spu_ptr<u8>(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::cmd));
update_pc();
call("spu_exec_mfc_cmd", &exec_mfc_cmd, m_thread);
return;
}
Expand Down Expand Up @@ -7698,7 +7712,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
const auto type = m_finfo->chunk->getFunctionType()->getPointerTo()->getPointerTo();

if (ret && g_cfg.core.spu_block_size != spu_block_size_type::safe)
if (ret && g_cfg.core.spu_block_size == spu_block_size_type::mega)
{
// Compare address stored in stack mirror with addr
const auto stack0 = eval(zext<u64>(sp) + ::offset32(&spu_thread::stack_mirror));
Expand Down Expand Up @@ -8089,7 +8103,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return;
}

if (g_cfg.core.spu_block_size != spu_block_size_type::safe && m_block_info[m_pos / 4 + 1] && m_entry_info[m_pos / 4 + 1])
if (g_cfg.core.spu_block_size == spu_block_size_type::mega && m_block_info[m_pos / 4 + 1] && m_entry_info[m_pos / 4 + 1])
{
// Store the return function chunk address at the stack mirror
const auto pfunc = add_function(m_pos + 4);
Expand Down
3 changes: 3 additions & 0 deletions rpcs3/Emu/Cell/SPURecompiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ class spu_runtime
// Debug module output location
std::string m_cache_path;

// Scratch vector
std::vector<std::pair<std::basic_string_view<u32>, spu_function_t>> m_flat_list;

public:

// Trampoline to spu_recompiler_base::dispatch
Expand Down

0 comments on commit 62f2766

Please sign in to comment.