Skip to content

Commit

Permalink
gsdx sw: factorize common draw scanline code
Browse files Browse the repository at this point in the history
Ymm inherite from Xmm so it is useless to duplicate the code

Add a parameter to alltrue to test the good register
  • Loading branch information
gregory38 committed Nov 24, 2016
1 parent 211c774 commit e3bfa2b
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 112 deletions.
102 changes: 18 additions & 84 deletions plugins/GSdx/GSDrawScanlineCodeGenerator.cpp
Expand Up @@ -122,82 +122,7 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key
}
}

#if _M_SSE >= 0x501

void GSDrawScanlineCodeGenerator::modulate16(const Ymm& a, const Operand& f, int shift)
{
if(shift == 0)
{
vpmulhrsw(a, f);
}
else
{
vpsllw(a, (uint8)(shift + 1));
vpmulhw(a, f);
}
}

void GSDrawScanlineCodeGenerator::lerp16(const Ymm& a, const Ymm& b, const Ymm& f, int shift)
{
vpsubw(a, b);
modulate16(a, f, shift);
vpaddw(a, b);
}

void GSDrawScanlineCodeGenerator::lerp16_4(const Ymm& a, const Ymm& b, const Ymm& f)
{
vpsubw(a, b);
vpmullw(a, f);
vpsraw(a, 4);
vpaddw(a, b);
}

void GSDrawScanlineCodeGenerator::mix16(const Ymm& a, const Ymm& b, const Ymm& temp)
{
vpblendw(a, b, 0xaa);
}

void GSDrawScanlineCodeGenerator::clamp16(const Ymm& a, const Ymm& temp)
{
vpackuswb(a, a);
vpermq(a, a, _MM_SHUFFLE(3, 1, 2, 0)); // this sucks
vpmovzxbw(a, a);
}

void GSDrawScanlineCodeGenerator::alltrue()
{
vpmovmskb(eax, ymm7);
cmp(eax, 0xffffffff);
je("step", T_NEAR);
}

void GSDrawScanlineCodeGenerator::blend(const Ymm& a, const Ymm& b, const Ymm& mask)
{
vpand(b, mask);
vpandn(mask, a);
vpor(a, b, mask);
}

void GSDrawScanlineCodeGenerator::blendr(const Ymm& b, const Ymm& a, const Ymm& mask)
{
vpand(b, mask);
vpandn(mask, a);
vpor(b, mask);
}

void GSDrawScanlineCodeGenerator::blend8(const Ymm& a, const Ymm& b)
{
vpblendvb(a, a, b, xmm0);
}

void GSDrawScanlineCodeGenerator::blend8r(const Ymm& b, const Ymm& a)
{
vpblendvb(b, a, b, xmm0);
}

#else

void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, int shift)
void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uint8 shift)
{
if(g_cpu.has(util::Cpu::tAVX))
{
Expand Down Expand Up @@ -226,7 +151,7 @@ void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, int
}
}

void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift)
void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, uint8 shift)
{
if(g_cpu.has(util::Cpu::tAVX))
{
Expand Down Expand Up @@ -288,6 +213,15 @@ void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp)
if(g_cpu.has(util::Cpu::tAVX))
{
vpackuswb(a, a);

#if _M_SSE >= 0x501
// Greg: why ?
if(g_cpu.has(util::Cpu::tAVX2)) {
ASSERT(a.isYMM());
vpermq(Ymm(a.getIdx()), Ymm(a.getIdx()), _MM_SHUFFLE(3, 1, 2, 0)); // this sucks
}
#endif

vpmovzxbw(a, a);
}
else
Expand All @@ -306,18 +240,20 @@ void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp)
}
}

void GSDrawScanlineCodeGenerator::alltrue()
void GSDrawScanlineCodeGenerator::alltrue(const Xmm& test)
{
uint32 mask = test.isYMM() ? 0xffffffff : 0xffff;

if(g_cpu.has(util::Cpu::tAVX))
{
vpmovmskb(eax, xmm7);
cmp(eax, 0xffff);
vpmovmskb(eax, test);
cmp(eax, mask);
je("step", T_NEAR);
}
else
{
pmovmskb(eax, xmm7);
cmp(eax, 0xffff);
pmovmskb(eax, test);
cmp(eax, mask);
je("step", T_NEAR);
}
}
Expand Down Expand Up @@ -416,5 +352,3 @@ void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const
psrlw(h, 8);
}
}

#endif
21 changes: 5 additions & 16 deletions plugins/GSdx/GSDrawScanlineCodeGenerator.h
Expand Up @@ -71,17 +71,6 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
void ReadTexel(int pixels, int mip_offset = 0);
void ReadTexel(const Ymm& dst, const Ymm& addr, uint8 i);

void modulate16(const Ymm& a, const Operand& f, int shift);
void lerp16(const Ymm& a, const Ymm& b, const Ymm& f, int shift);
void lerp16_4(const Ymm& a, const Ymm& b, const Ymm& f);
void mix16(const Ymm& a, const Ymm& b, const Ymm& temp);
void clamp16(const Ymm& a, const Ymm& temp);
void alltrue();
void blend(const Ymm& a, const Ymm& b, const Ymm& mask);
void blendr(const Ymm& b, const Ymm& a, const Ymm& mask);
void blend8(const Ymm& a, const Ymm& b);
void blend8r(const Ymm& b, const Ymm& a);

#else

void Generate_SSE();
Expand Down Expand Up @@ -138,20 +127,20 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
void ReadTexel_AVX(int pixels, int mip_offset = 0);
void ReadTexel_AVX(const Xmm& dst, const Xmm& addr, uint8 i);

void modulate16(const Xmm& a, const Operand& f, int shift);
void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift);
#endif

void modulate16(const Xmm& a, const Operand& f, uint8 shift);
void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, uint8 shift);
void lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f);
void mix16(const Xmm& a, const Xmm& b, const Xmm& temp);
void clamp16(const Xmm& a, const Xmm& temp);
void alltrue();
void alltrue(const Xmm& test);
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
void blendr(const Xmm& b, const Xmm& a, const Xmm& mask);
void blend8(const Xmm& a, const Xmm& b);
void blend8r(const Xmm& b, const Xmm& a);
void split16_2x8(const Xmm& l, const Xmm& h, const Xmm& src);

#endif

public:
GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);

Expand Down
6 changes: 3 additions & 3 deletions plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp
Expand Up @@ -727,7 +727,7 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2)
break;
}

alltrue();
alltrue(_test);
}
}

Expand Down Expand Up @@ -1337,7 +1337,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha_AVX()
case AFAIL_KEEP:
// test |= t;
vpor(_test, xmm1);
alltrue();
alltrue(_test);
break;

case AFAIL_FB_ONLY:
Expand Down Expand Up @@ -1509,7 +1509,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX()

vpor(_test, xmm1);

alltrue();
alltrue(_test);
}

void GSDrawScanlineCodeGenerator::WriteMask_AVX()
Expand Down
6 changes: 3 additions & 3 deletions plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp
Expand Up @@ -689,7 +689,7 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2)
break;
}

alltrue();
alltrue(xmm7);
}
}

Expand Down Expand Up @@ -2130,7 +2130,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha_AVX()
case AFAIL_KEEP:
// test |= t;
vpor(xmm7, xmm1);
alltrue();
alltrue(xmm7);
break;

case AFAIL_FB_ONLY:
Expand Down Expand Up @@ -2313,7 +2313,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX()

vpor(xmm7, xmm1);

alltrue();
alltrue(xmm7);
}

void GSDrawScanlineCodeGenerator::WriteMask_AVX()
Expand Down
6 changes: 3 additions & 3 deletions plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx2.cpp
Expand Up @@ -691,7 +691,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2)
break;
}

alltrue();
alltrue(ymm7);
}
}

Expand Down Expand Up @@ -2118,7 +2118,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha()
case AFAIL_KEEP:
// test |= t;
vpor(ymm7, ymm1);
alltrue();
alltrue(ymm7);
break;

case AFAIL_FB_ONLY:
Expand Down Expand Up @@ -2309,7 +2309,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()

vpor(ymm7, ymm1);

alltrue();
alltrue(ymm7);
}

void GSDrawScanlineCodeGenerator::WriteMask()
Expand Down
6 changes: 3 additions & 3 deletions plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp
Expand Up @@ -694,7 +694,7 @@ void GSDrawScanlineCodeGenerator::TestZ_SSE(const Xmm& temp1, const Xmm& temp2)
break;
}

alltrue();
alltrue(xmm7);
}
}

Expand Down Expand Up @@ -2162,7 +2162,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha_SSE()
case AFAIL_KEEP:
// test |= t;
por(xmm7, xmm1);
alltrue();
alltrue(xmm7);
break;

case AFAIL_FB_ONLY:
Expand Down Expand Up @@ -2344,7 +2344,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_SSE()

por(xmm7, xmm1);

alltrue();
alltrue(xmm7);
}

void GSDrawScanlineCodeGenerator::WriteMask_SSE()
Expand Down

0 comments on commit e3bfa2b

Please sign in to comment.