From 425e032a62fee3ed295454bf20c8be0252de9d3b Mon Sep 17 00:00:00 2001 From: Emmanuel Gil Peyrot Date: Wed, 20 Nov 2019 16:53:22 +0100 Subject: [PATCH] SPU: Copy with memcpy() instead of hand-rolled SSE2 In some very unscientific benchmark: spu_thread::do_dma_transfer() was taking 2.27% of my CPU before, now 0.07%, while __memmove_avx_unaligned_erms() was taking 1.47% and now 2.88%, which added makes about 0.8% saved. --- rpcs3/Emu/Cell/SPUThread.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index cb9f124ef2a2..d1c58afe9159 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1433,6 +1433,9 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) auto lock = vm::passive_lock(eal & -128, ::align(eal + size, 128)); +#ifdef __GNUG__ + std::memcpy(dst, src, size); +#else while (size >= 128) { mov_rdata(*reinterpret_cast(dst), *reinterpret_cast(src)); @@ -1450,6 +1453,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) src += 16; size -= 16; } +#endif lock->release(0); break; @@ -1483,6 +1487,9 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) } default: { +#ifdef __GNUG__ + std::memcpy(dst, src, size); +#else while (size >= 128) { mov_rdata(*reinterpret_cast(dst), *reinterpret_cast(src)); @@ -1500,6 +1507,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) src += 16; size -= 16; } +#endif break; }