Skip to content

Commit

Permalink
vif: replace sse cmp code with standard cmp
Browse files Browse the repository at this point in the history
Standard instruction are faster to execute besides the CPU can optimize the cmp/jne

SSE

  e0:	add    ecx,0x10
  e3:	cmp    eax,0x7
  e6:	jg     1b0 <void dVifUnpack<0>(unsigned char const*, bool)+0x1b0>
enter_loop:
  ec:	vpcmpeqd xmm0,xmm1,XMMWORD PTR [ecx]
  f0:	vmovmskps eax,xmm0
  f4:	cmp    eax,0x7
  f7:	jne    e0 <void dVifUnpack<0>(unsigned char const*, bool)+0xe0>

Standard cmp

  d8:	add    eax,0x10
  db:	mov    esi,DWORD PTR [eax+0xc]
  de:	test   esi,esi
  e0:	je     190 <void dVifUnpack<0>(unsigned char const*, bool)+0x190>
enter_loop:
  e6:	cmp    ecx,DWORD PTR [eax+0x4]
  e9:	jne    d8 <void dVifUnpack<0>(unsigned char const*, bool)+0xd8>
  eb:	cmp    DWORD PTR [eax+0x8],ebx
  ee:	jne    d8 <void dVifUnpack<0>(unsigned char const*, bool)+0xd8>

v2: use reference instead of a pointer for find parameter
  • Loading branch information
gregory38 committed Dec 18, 2016
1 parent 2320efe commit 7a33cda
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 15 deletions.
2 changes: 1 addition & 1 deletion pcsx2/x86/newVif_Dynarec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ _vifT static __fi u8* dVifsetVUptr(uint cl, uint wl, u8 num, bool isFill) {

_vifT __fi uptr dVifCompile(nVifBlock& key) {
nVifStruct& v = nVif[idx];
nVifBlock* block = v.vifBlocks.find(&key);
nVifBlock* block = v.vifBlocks.find(key);

// Cache hit
if (likely(block != nullptr))
Expand Down
23 changes: 9 additions & 14 deletions pcsx2/x86/newVif_HashBucket.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,23 +64,18 @@ class HashBucket {

~HashBucket() throw() { clear(); }

__fi nVifBlock* find(nVifBlock* dataPtr) {
const __m128i* chainpos = (__m128i*)m_bucket[dataPtr->hash_key];

const __m128i data128( _mm_load_si128((__m128i*)dataPtr) );

int result;
do {
// This inline SSE code is generally faster than using emitter code, since it inlines nicely. --air
result = _mm_movemask_ps( _mm_castsi128_ps( _mm_cmpeq_epi32( data128, _mm_load_si128(chainpos) ) ) );
// startPtr doesn't match (aka not nullptr) hence 4th bit must be 0
if (result == 0x7) return (nVifBlock*)chainpos;
__fi nVifBlock* find(const nVifBlock& dataPtr) {
nVifBlock* chainpos = m_bucket[dataPtr.hash_key];

chainpos += sizeof(nVifBlock) / sizeof(__m128i);
while (true) {
if (chainpos->key0 == dataPtr.key0 && chainpos->key1 == dataPtr.key1)
return chainpos;

} while(result < 0x8);
if (chainpos->startPtr == 0)
return nullptr;

return nullptr;
chainpos++;
}
}

void add(const nVifBlock& dataPtr) {
Expand Down

0 comments on commit 7a33cda

Please sign in to comment.