From 48926a7ec47fe7481b73287cd90663e5e7ca17aa Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 16 Oct 2022 22:05:45 +1000 Subject: [PATCH] x86/iR5900: Fix quadword stores on Linux Linux counts vector and GPR registers separately for which register they get passed in when calling functions. Windows uses the argument position. --- common/emitter/x86types.h | 37 ++++++++++++++++++++++++++- pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 13 +++++----- pcsx2/x86/ix86-32/recVTLB.cpp | 6 ++--- 3 files changed, 46 insertions(+), 10 deletions(-) diff --git a/common/emitter/x86types.h b/common/emitter/x86types.h index 4ddfe528c05f5..9c41a52642667 100644 --- a/common/emitter/x86types.h +++ b/common/emitter/x86types.h @@ -442,6 +442,11 @@ namespace x86Emitter static const inline xRegisterSSE& GetInstance(uint id); static const inline xRegisterSSE& GetYMMInstance(uint id); + + /// Returns the register to use when calling a C function. + /// arg_number is the argument position from the left, starting with 0. + /// sse_number is the argument position relative to the number of vector registers. + static const inline xRegisterSSE& GetArgRegister(uint arg_number, uint sse_number, bool ymm = false); }; class xRegisterCL : public xRegister8 @@ -483,6 +488,11 @@ namespace x86Emitter // Returns true if the register is the stack pointer: ESP. bool IsStackPointer() const { return Id == 4; } + /// Returns the register to use when calling a C function. + /// arg_number is the argument position from the left, starting with 0. + /// sse_number is the argument position relative to the number of vector registers. + static const inline xAddressReg& GetArgRegister(uint arg_number, uint gpr_number); + xAddressVoid operator+(const xAddressReg& right) const; xAddressVoid operator+(sptr right) const; xAddressVoid operator+(const void* right) const; @@ -490,7 +500,6 @@ namespace x86Emitter xAddressVoid operator-(const void* right) const; xAddressVoid operator*(int factor) const; xAddressVoid operator<<(u32 shift) const; - xAddressReg& operator=(const xAddressReg&) = default; }; // -------------------------------------------------------------------------------------- @@ -653,6 +662,32 @@ extern const xRegister32 return *m_tbl_ymmRegs[id]; } + const xRegisterSSE& xRegisterSSE::GetArgRegister(uint arg_number, uint sse_number, bool ymm) + { +#ifdef _WIN32 + // Windows passes arguments according to their position from the left. + return ymm ? GetYMMInstance(arg_number) : GetInstance(arg_number); +#else + // Linux counts the number of vector parameters. + return ymm ? GetYMMInstance(sse_number) : GetInstance(sse_number); +#endif + } + + const xAddressReg& xAddressReg::GetArgRegister(uint arg_number, uint gpr_number) + { +#ifdef _WIN32 + // Windows passes arguments according to their position from the left. + static constexpr const xAddressReg* regs[] = {&rcx, &rdx, &r8, &r9}; + pxAssert(arg_number < std::size(regs)); + return *regs[arg_number]; +#else + // Linux counts the number of GPR parameters. + static constexpr const xAddressReg* regs[] = {&rdi, &rsi, &rdx, &rcx}; + pxAssert(gpr_number < std::size(regs)); + return *regs[gpr_number]; +#endif + } + // -------------------------------------------------------------------------------------- // xAddressVoid // -------------------------------------------------------------------------------------- diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 50b636f77a245..887a4532a91f2 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -200,9 +200,10 @@ static void recStore(u32 bits) else { _flushEEreg(_Rt_); // flush register to mem - int rpreg = _allocTempXMMreg(XMMT_INT, 1); - xMOVAPS(xRegisterSSE(rpreg), ptr128[&cpuRegs.GPR.r[_Rt_].UL[0]]); - _freeXMMreg(rpreg); + + const xRegisterSSE& dreg = xRegisterSSE::GetArgRegister(1, 0); + _freeXMMreg(dreg.GetId()); + xMOVAPS(dreg, ptr128[&cpuRegs.GPR.r[_Rt_].UL[0]]); } // Load ECX with the destination address, or issue a direct optimized write @@ -981,9 +982,9 @@ void recSQC2() skip.SetTarget(); skipvuidle.SetTarget(); - int rpreg = _allocTempXMMreg(XMMT_INT, 1); - xMOVAPS(xRegisterSSE(rpreg), ptr128[&VU0.VF[_Ft_].UD[0]]); - _freeXMMreg(rpreg); + const xRegisterSSE& dreg = xRegisterSSE::GetArgRegister(1, 0); + _freeXMMreg(dreg.GetId()); + xMOVAPS(dreg, ptr128[&VU0.VF[_Ft_].UD[0]]); if (GPR_IS_CONST1(_Rs_)) { diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index 9f7696b44d57d..aa0f729fab710 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -161,7 +161,7 @@ namespace vtlb_private break; case 128: - xMOVAPS(ptr[arg1reg], xmm1); + xMOVAPS(ptr[arg1reg], xRegisterSSE::GetArgRegister(1, 0)); break; } } @@ -514,7 +514,7 @@ void vtlb_DynGenWrite_Const(u32 bits, u32 addr_const) break; case 128: - xMOVAPS(ptr128[(void*)ppf], xmm1); + xMOVAPS(ptr128[(void*)ppf], xRegisterSSE::GetArgRegister(1, 0)); break; } } @@ -534,7 +534,7 @@ void vtlb_DynGenWrite_Const(u32 bits, u32 addr_const) } iFlushCall(FLUSH_FULLVTLB); - xFastCall(vmv.assumeHandlerGetRaw(szidx, true), paddr, arg2reg); + xFastCall(vmv.assumeHandlerGetRaw(szidx, true), paddr); } }