diff --git a/Utilities/types.h b/Utilities/types.h index 728bfb9313a6..855791bcf1fa 100644 --- a/Utilities/types.h +++ b/Utilities/types.h @@ -46,7 +46,7 @@ #define ASSUME(...) do { if (!(__VA_ARGS__)) __builtin_unreachable(); } while (0) // note: the compiler will generate code to evaluate "cond" if the expression is opaque #endif -#define SAFE_BUFFERS +#define SAFE_BUFFERS __attribute__((no_stack_protector)) #define NEVER_INLINE __attribute__((noinline)) #define FORCE_INLINE __attribute__((always_inline)) inline #define RESTRICT __restrict__ diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 90df5dbfb075..ff6e69f6f4e9 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -5,6 +5,7 @@ #include "Emu/system_config.h" #include "PPUThread.h" #include "Utilities/asm.h" +#include "Utilities/sysinfo.h" #include "Emu/Cell/Common.h" #include @@ -21,6 +22,8 @@ #define SSSE3_FUNC __attribute__((__target__("ssse3"))) #endif +const bool s_use_ssse3 = utils::has_ssse3(); + inline u64 dup32(u32 x) { return x | static_cast(x) << 32; } // Write values to CR field @@ -123,7 +126,7 @@ extern __m128 sse_log2_ps(__m128 A) return _mm_add_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(x5, x6), x7), x4), _c), _mm_add_ps(_mm_mul_ps(x4, _c), x8)); } -extern __m128i sse_pshufb(__m128i data, __m128i index) +extern SAFE_BUFFERS __m128i sse_pshufb(__m128i data, __m128i index) { v128 m = v128::fromV(_mm_and_si128(index, _mm_set1_epi8(0xf))); v128 a = v128::fromV(data); @@ -146,7 +149,7 @@ extern SSSE3_FUNC __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C) return _mm_or_si128(_mm_and_si128(mask, sa), _mm_andnot_si128(mask, sb)); } -extern __m128i sse_altivec_vperm_v0(__m128i A, __m128i B, __m128i C) +extern SAFE_BUFFERS __m128i sse_altivec_vperm_v0(__m128i A, __m128i B, __m128i C) { __m128i ab[2]{B, A}; v128 index = v128::fromV(_mm_andnot_si128(C, _mm_set1_epi8(0x1f))); @@ -1427,15 +1430,11 @@ bool ppu_interpreter::VOR(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter_precise::VPERM(ppu_thread& ppu, ppu_opcode_t op) -{ - ppu.vr[op.vd].vi = sse_altivec_vperm_v0(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi); - return true; -} - -bool ppu_interpreter_fast::VPERM(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter::VPERM(ppu_thread& ppu, ppu_opcode_t op) { - ppu.vr[op.vd].vi = sse_altivec_vperm(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi); + ppu.vr[op.vd].vi = s_use_ssse3 + ? sse_altivec_vperm(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi) + : sse_altivec_vperm_v0(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi); return true; } @@ -3959,17 +3958,10 @@ bool ppu_interpreter::DIVW(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter_precise::LVLX(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter::LVLX(ppu_thread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; - ppu.vr[op.vd].vi = sse_cellbe_lvlx_v0(addr); - return true; -} - -bool ppu_interpreter_fast::LVLX(ppu_thread& ppu, ppu_opcode_t op) -{ - const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; - ppu.vr[op.vd].vi = sse_cellbe_lvlx(addr); + ppu.vr[op.vd].vi = s_use_ssse3 ? sse_cellbe_lvlx(addr) : sse_cellbe_lvlx_v0(addr); return true; } @@ -4030,17 +4022,10 @@ bool ppu_interpreter::SRD(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter_precise::LVRX(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter::LVRX(ppu_thread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; - ppu.vr[op.vd].vi = sse_cellbe_lvrx_v0(addr); - return true; -} - -bool ppu_interpreter_fast::LVRX(ppu_thread& ppu, ppu_opcode_t op) -{ - const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; - ppu.vr[op.vd].vi = sse_cellbe_lvrx(addr); + ppu.vr[op.vd].vi = s_use_ssse3 ? sse_cellbe_lvrx(addr) : sse_cellbe_lvrx_v0(addr); return true; } @@ -4105,17 +4090,10 @@ bool ppu_interpreter::LFDUX(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter_precise::STVLX(ppu_thread& ppu, ppu_opcode_t op) -{ - const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; - sse_cellbe_stvlx_v0(addr, ppu.vr[op.vs].vi); - return true; -} - -bool ppu_interpreter_fast::STVLX(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter::STVLX(ppu_thread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; - sse_cellbe_stvlx(addr, ppu.vr[op.vs].vi); + s_use_ssse3 ? sse_cellbe_stvlx(addr, ppu.vr[op.vs].vi) : sse_cellbe_stvlx_v0(addr, ppu.vr[op.vs].vi); return true; } @@ -4160,17 +4138,10 @@ bool ppu_interpreter::STFSX(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter_precise::STVRX(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter::STVRX(ppu_thread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; - sse_cellbe_stvrx_v0(addr, ppu.vr[op.vs].vi); - return true; -} - -bool ppu_interpreter_fast::STVRX(ppu_thread& ppu, ppu_opcode_t op) -{ - const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; - sse_cellbe_stvrx(addr, ppu.vr[op.vs].vi); + s_use_ssse3 ? sse_cellbe_stvrx(addr, ppu.vr[op.vs].vi) : sse_cellbe_stvrx_v0(addr, ppu.vr[op.vs].vi); return true; } @@ -4227,12 +4198,7 @@ bool ppu_interpreter::STFDUX(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter_precise::LVLXL(ppu_thread& ppu, ppu_opcode_t op) -{ - return LVLX(ppu, op); -} - -bool ppu_interpreter_fast::LVLXL(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter::LVLXL(ppu_thread& ppu, ppu_opcode_t op) { return LVLX(ppu, op); } @@ -4282,12 +4248,7 @@ bool ppu_interpreter::SRAD(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter_precise::LVRXL(ppu_thread& ppu, ppu_opcode_t op) -{ - return LVRX(ppu, op); -} - -bool ppu_interpreter_fast::LVRXL(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter::LVRXL(ppu_thread& ppu, ppu_opcode_t op) { return LVRX(ppu, op); } @@ -4324,12 +4285,7 @@ bool ppu_interpreter::EIEIO(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter_precise::STVLXL(ppu_thread& ppu, ppu_opcode_t op) -{ - return STVLX(ppu, op); -} - -bool ppu_interpreter_fast::STVLXL(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter::STVLXL(ppu_thread& ppu, ppu_opcode_t op) { return STVLX(ppu, op); } @@ -4348,12 +4304,7 @@ bool ppu_interpreter::EXTSH(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter_precise::STVRXL(ppu_thread& ppu, ppu_opcode_t op) -{ - return STVRX(ppu, op); -} - -bool ppu_interpreter_fast::STVRXL(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter::STVRXL(ppu_thread& ppu, ppu_opcode_t op) { return STVRX(ppu, op); } diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 14e0ba08ef89..39e96c92eb20 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -76,6 +76,7 @@ struct ppu_interpreter static bool VNMSUBFP(ppu_thread&, ppu_opcode_t); static bool VNOR(ppu_thread&, ppu_opcode_t); static bool VOR(ppu_thread&, ppu_opcode_t); + static bool VPERM(ppu_thread&, ppu_opcode_t); static bool VPKPX(ppu_thread&, ppu_opcode_t); static bool VPKUHUM(ppu_thread&, ppu_opcode_t); static bool VPKUWUM(ppu_thread&, ppu_opcode_t); @@ -328,12 +329,6 @@ struct ppu_interpreter static bool FCTIDZ(ppu_thread&, ppu_opcode_t); static bool FCFID(ppu_thread&, ppu_opcode_t); - static bool UNK(ppu_thread&, ppu_opcode_t); -}; - -struct ppu_interpreter_precise final : ppu_interpreter -{ - static bool VPERM(ppu_thread&, ppu_opcode_t); static bool LVLX(ppu_thread&, ppu_opcode_t); static bool LVLXL(ppu_thread&, ppu_opcode_t); static bool LVRX(ppu_thread&, ppu_opcode_t); @@ -343,6 +338,11 @@ struct ppu_interpreter_precise final : ppu_interpreter static bool STVRX(ppu_thread&, ppu_opcode_t); static bool STVRXL(ppu_thread&, ppu_opcode_t); + static bool UNK(ppu_thread&, ppu_opcode_t); +}; + +struct ppu_interpreter_precise final : ppu_interpreter +{ static bool VPKSHSS(ppu_thread&, ppu_opcode_t); static bool VPKSHUS(ppu_thread&, ppu_opcode_t); static bool VPKSWSS(ppu_thread&, ppu_opcode_t); @@ -400,16 +400,6 @@ struct ppu_interpreter_precise final : ppu_interpreter struct ppu_interpreter_fast final : ppu_interpreter { - static bool VPERM(ppu_thread&, ppu_opcode_t); - static bool LVLX(ppu_thread&, ppu_opcode_t); - static bool LVLXL(ppu_thread&, ppu_opcode_t); - static bool LVRX(ppu_thread&, ppu_opcode_t); - static bool LVRXL(ppu_thread&, ppu_opcode_t); - static bool STVLX(ppu_thread&, ppu_opcode_t); - static bool STVLXL(ppu_thread&, ppu_opcode_t); - static bool STVRX(ppu_thread&, ppu_opcode_t); - static bool STVRXL(ppu_thread&, ppu_opcode_t); - static bool VPKSHSS(ppu_thread&, ppu_opcode_t); static bool VPKSHUS(ppu_thread&, ppu_opcode_t); static bool VPKSWSS(ppu_thread&, ppu_opcode_t); diff --git a/rpcs3/Emu/Cell/PPUOpcodes.h b/rpcs3/Emu/Cell/PPUOpcodes.h index 8f6ff4abd9cd..f1af487b4df4 100644 --- a/rpcs3/Emu/Cell/PPUOpcodes.h +++ b/rpcs3/Emu/Cell/PPUOpcodes.h @@ -574,12 +574,6 @@ class ppu_decoder }); } - template - ppu_decoder(F&& init) : ppu_decoder() - { - init(m_table); - } - const std::array& get_table() const { return m_table; diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 9768bf8502e8..3a5e1065f3f3 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -101,57 +101,8 @@ void fmt_class_string::format(std::string& out, u64 arg) }); } -// Table of identical interpreter functions when precise contains SSE2 version, and fast contains SSSE3 functions -const std::pair s_ppu_dispatch_table[] -{ -#define FUNC(x) {&ppu_interpreter_precise::x, &ppu_interpreter_fast::x} - FUNC(VPERM), - FUNC(LVLX), - FUNC(LVLXL), - FUNC(LVRX), - FUNC(LVRXL), - FUNC(STVLX), - FUNC(STVLXL), - FUNC(STVRX), - FUNC(STVRXL), -#undef FUNC -}; - -static const ppu_decoder g_ppu_interpreter_precise([](auto& table) -{ - if (s_use_ssse3) - { - for (auto& func : table) - { - for (const auto& pair : s_ppu_dispatch_table) - { - if (pair.first == func) - { - func = pair.second; - break; - } - } - } - } -}); - -static const ppu_decoder g_ppu_interpreter_fast([](auto& table) -{ - if (!s_use_ssse3) - { - for (auto& func : table) - { - for (const auto& pair : s_ppu_dispatch_table) - { - if (pair.second == func) - { - func = pair.first; - break; - } - } - } - } -}); +constexpr ppu_decoder g_ppu_interpreter_precise; +constexpr ppu_decoder g_ppu_interpreter_fast; extern void ppu_initialize(); extern void ppu_initialize(const ppu_module& info); diff --git a/rpcs3/main.cpp b/rpcs3/main.cpp index fb5147c72e2a..d5d7b9b0d81c 100644 --- a/rpcs3/main.cpp +++ b/rpcs3/main.cpp @@ -412,6 +412,7 @@ int main(int argc, char** argv) parser.addOption(QCommandLineOption(arg_styles, "Lists the available styles.")); parser.addOption(QCommandLineOption(arg_style, "Loads a custom style.", "style", "")); parser.addOption(QCommandLineOption(arg_stylesheet, "Loads a custom stylesheet.", "path", "")); + parser.addOption(QCommandLineOption(arg_updating, "For internal usage.")); parser.process(app->arguments()); // Don't start up the full rpcs3 gui if we just want the version or help.