Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PPU LLVM/RawSPU: Implement MMIO violation elimination pass #14137

Merged
merged 1 commit into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion rpcs3/Emu/Cell/PPUOpcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,7 @@ namespace ppu_instructions

inline u32 ADDI(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x0eu << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
inline u32 ADDIS(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x0fu << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
inline u32 XORIS(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x1bu << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
inline u32 ORI(u32 rt, u32 ra, u32 ui) { ppu_opcode_t op{ 0x18u << 26 }; op.rd = rt; op.ra = ra; op.uimm16 = ui; return op.opcode; }
inline u32 ORIS(u32 rt, u32 ra, u32 ui) { ppu_opcode_t op{ 0x19u << 26 }; op.rd = rt; op.ra = ra; op.uimm16 = ui; return op.opcode; }
inline u32 OR(u32 ra, u32 rs, u32 rb, bool rc = false) { ppu_opcode_t op{ 0x1fu << 26 | 0x1bcu << 1 }; op.rs = rs; op.ra = ra; op.rb = rb; op.rc = rc; return op.opcode; }
Expand All @@ -685,6 +686,7 @@ namespace ppu_instructions
inline u32 MFSPR(u32 rt, u32 spr) { ppu_opcode_t op{ 0x1fu << 26 | 0x153u << 1 }; op.rd = rt; op.spr = spr; return op.opcode; }
inline u32 MTSPR(u32 spr, u32 rs) { ppu_opcode_t op{ 0x1fu << 26 | 0x1d3u << 1 }; op.rs = rs; op.spr = spr; return op.opcode; }
inline u32 LWZ(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x20u << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
inline u32 STW(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x24u << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
inline u32 STD(u32 rs, u32 ra, s32 si) { ppu_opcode_t op{ 0x3eu << 26 }; op.rs = rs; op.ra = ra; op.ds = si / 4; return op.opcode; }
inline u32 STDU(u32 rs, u32 ra, s32 si) { ppu_opcode_t op{ 0x3eu << 26 | 1 }; op.rs = rs; op.ra = ra; op.ds = si / 4; return op.opcode; }
inline u32 LD(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x3au << 26 }; op.rd = rt; op.ra = ra; op.ds = si / 4; return op.opcode; }
Expand All @@ -697,6 +699,7 @@ namespace ppu_instructions
inline u32 STVX(u32 vs, u32 ra, u32 rb) { ppu_opcode_t op{ 31 << 26 | 231 << 1 }; op.vs = vs; op.ra = ra; op.rb = rb; return op.opcode; }
inline u32 LFD(u32 frd, u32 ra, s32 si) { ppu_opcode_t op{ 50u << 26 }; op.frd = frd; op.ra = ra; op.simm16 = si; return op.opcode; }
inline u32 LVX(u32 vd, u32 ra, u32 rb) { ppu_opcode_t op{ 31 << 26 | 103 << 1 }; op.vd = vd; op.ra = ra; op.rb = rb; return op.opcode; }
inline constexpr u32 EIEIO() { return 0x7c0006ac; }

namespace implicts
{
Expand Down Expand Up @@ -734,7 +737,7 @@ namespace ppu_instructions
inline u32 CLRLDI(u32 x, u32 y, u32 n) { return RLDICL(x, y, 0, n, false); }
inline u32 CLRRDI(u32 x, u32 y, u32 n) { return RLDICR(x, y, 0, 63 - n, false); }

inline u32 TRAP() { return 0x7FE00008; } // tw 31,r0,r0
inline constexpr u32 TRAP() { return 0x7FE00008; } // tw 31,r0,r0
}

using namespace implicts;
Expand Down
165 changes: 165 additions & 0 deletions rpcs3/Emu/Cell/PPUThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@
#include <thread>
#include <cfenv>
#include <cctype>
#include <span>
#include <optional>

#include "util/asm.hpp"
#include "util/vm.hpp"
#include "util/v128.hpp"
Expand Down Expand Up @@ -485,6 +487,167 @@ void ppu_reservation_fallback(ppu_thread& ppu)
}
}

u32 ppu_read_mmio_aware_u32(u8* vm_base, u32 eal)
{
if (eal >= RAW_SPU_BASE_ADDR)
{
// RawSPU MMIO
auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu((eal - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET));

if (!thread)
{
// Access Violation
}
else if ((eal - RAW_SPU_BASE_ADDR) % RAW_SPU_OFFSET + sizeof(u32) - 1 < SPU_LS_SIZE) // LS access
{
}
else if (u32 value{}; thread->read_reg(eal, value))
{
return std::bit_cast<be_t<u32>>(value);
}
else
{
fmt::throw_exception("Invalid RawSPU MMIO offset (addr=0x%x)", eal);
}
}

// Value is assumed to be swapped
return read_from_ptr<u32>(vm_base + eal);
}

void ppu_write_mmio_aware_u32(u8* vm_base, u32 eal, u32 value)
{
if (eal >= RAW_SPU_BASE_ADDR)
{
// RawSPU MMIO
auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu((eal - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET));

if (!thread)
{
// Access Violation
}
else if ((eal - RAW_SPU_BASE_ADDR) % RAW_SPU_OFFSET + sizeof(u32) - 1 < SPU_LS_SIZE) // LS access
{
}
else if (thread->write_reg(eal, std::bit_cast<be_t<u32>>(value)))
{
return;
}
else
{
fmt::throw_exception("Invalid RawSPU MMIO offset (addr=0x%x)", eal);
}
}

// Value is assumed swapped
write_to_ptr<u32>(vm_base + eal, value);
}

extern bool ppu_test_address_may_be_mmio(std::span<const be_t<u32>> insts)
{
std::set<u32> reg_offsets;
bool found_raw_spu_base = false;
bool found_spu_area_offset_element = false;

for (u32 inst : insts)
{
// Common around MMIO (orders IO)
if (inst == ppu_instructions::EIEIO())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Old MacDonald Had A Farm... 😆

{
return true;
}

const u32 op_imm16 = (inst & 0xfc00ffff);

// RawSPU MMIO base
// 0xe00000000 is a common constant so try to find an ORIS 0x10 or ADDIS 0x10 nearby (for multiplying SPU ID by it)
if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x2000) || op_imm16 == ppu_instructions::ORIS({}, {}, 0xe000) || op_imm16 == ppu_instructions::XORIS({}, {}, 0xe000))
{
found_raw_spu_base = true;

if (found_spu_area_offset_element)
{
// Found both
return true;
}
}
else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0x10) || op_imm16 == ppu_instructions::ADDIS({}, {}, 0x10))
{
found_spu_area_offset_element = true;

if (found_raw_spu_base)
{
// Found both
return true;
}
}
// RawSPU MMIO base + problem state offset
else if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x1ffc))
{
return true;
}
else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0xe004))
{
return true;
}
else if (op_imm16 == ppu_instructions::XORIS({}, {}, 0xe004))
{
return true;
}
// RawSPU MMIO base + problem state offset + 64k of SNR1 offset
else if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x1ffb))
{
return true;
}
else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0xe005))
{
return true;
}
else if (op_imm16 == ppu_instructions::XORIS({}, {}, 0xe005))
{
return true;
}
// RawSPU MMIO base + problem state offset + 264k of SNR2 offset (STW allows 32K+- offset so in order to access SNR2 it needs to first add another 64k)
// SNR2 is the only register currently implemented that has its 0x80000 bit is set so its the only one its hardcoded access is done this way
else if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x1ffa))
{
return true;
}
else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0xe006))
{
return true;
}
else if (op_imm16 == ppu_instructions::XORIS({}, {}, 0xe006))
{
return true;
}
// Try to detect a function that receives RawSPU problem state base pointer as an argument
else if ((op_imm16 & ~0xffff) == ppu_instructions::LWZ({}, {}, 0) ||
(op_imm16 & ~0xffff) == ppu_instructions::STW({}, {}, 0) ||
(op_imm16 & ~0xffff) == ppu_instructions::ADDI({}, {}, 0))
{
const bool is_load = (op_imm16 & ~0xffff) == ppu_instructions::LWZ({}, {}, 0);
const bool is_store = (op_imm16 & ~0xffff) == ppu_instructions::STW({}, {}, 0);
const bool is_neither = !is_store && !is_load;
const bool is_snr = (is_store || is_neither) && ((op_imm16 & 0xffff) == (SPU_RdSigNotify2_offs & 0xffff) || (op_imm16 & 0xffff) == (SPU_RdSigNotify1_offs & 0xffff));

if (is_snr || spu_thread::test_is_problem_state_register_offset(op_imm16 & 0xffff, is_load || is_neither, is_store || is_neither))
{
reg_offsets.insert(op_imm16 & 0xffff);

if (reg_offsets.size() >= 2)
{
// Assume high MMIO likelyhood if more than one offset appears in nearby code
// Such as common IN_MBOX + OUT_MBOX
return true;
}
}
}
}

return false;
}

struct ppu_toc_manager
{
std::unordered_map<u32, u32> toc_map;
Expand Down Expand Up @@ -3529,6 +3692,8 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
{ "__resupdate", reinterpret_cast<u64>(vm::reservation_update) },
{ "__resinterp", reinterpret_cast<u64>(ppu_reservation_fallback) },
{ "__escape", reinterpret_cast<u64>(+ppu_escape) },
{ "__read_maybe_mmio32", reinterpret_cast<u64>(+ppu_read_mmio_aware_u32) },
{ "__write_maybe_mmio32", reinterpret_cast<u64>(+ppu_write_mmio_aware_u32) },
};

for (u64 index = 0; index < 1024; index++)
Expand Down
Loading