Skip to content

Commit

Permalink
- Implement the affine part of the dynamic lights in the softpoly TC …
Browse files Browse the repository at this point in the history
…SSE2 drawer
  • Loading branch information
dpjudas committed Sep 21, 2017
1 parent 94a3322 commit ca2ef80
Showing 1 changed file with 49 additions and 17 deletions.
66 changes: 49 additions & 17 deletions src/polyrenderer/drawers/poly_drawer32_sse2.h
Expand Up @@ -142,7 +142,14 @@ namespace TriScreenDrawerModes
}
}

FORCEINLINE __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, const PolyLight *lights, int num_lights, __m128 worldpos, __m128 worldnormal)
FORCEINLINE __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, __m128i dynlight)
{
fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8));
fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255));
return fgcolor;
}

FORCEINLINE __m128i VECTORCALL CalcDynamicLight(const PolyLight *lights, int num_lights, __m128 worldpos, __m128 worldnormal)
{
__m128i lit = _mm_setzero_si128();

Expand Down Expand Up @@ -189,15 +196,11 @@ namespace TriScreenDrawerModes
lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8));
}

lit = _mm_min_epi16(lit, _mm_set1_epi16(256));

fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8));
fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255));
return fgcolor;
return _mm_min_epi16(lit, _mm_set1_epi16(256));
}

template<typename ShadeModeT>
FORCEINLINE __m128i VECTORCALL Shade32(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const PolyLight *lights, int num_lights, __m128 worldpos, __m128 worldnormal)
FORCEINLINE __m128i VECTORCALL Shade32(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, __m128i dynlight)
{
__m128i material = fgcolor;
if (ShadeModeT::Mode == (int)ShadeMode::Simple)
Expand All @@ -224,7 +227,7 @@ namespace TriScreenDrawerModes
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8);
}

return AddLights(material, fgcolor, lights, num_lights, worldpos, worldnormal);
return AddLights(material, fgcolor, dynlight);
}

template<typename BlendT>
Expand Down Expand Up @@ -391,7 +394,6 @@ class TriScreenDrawer32

auto lights = args->uniforms->Lights();
auto num_lights = args->uniforms->NumLights();
__m128 worldpos = _mm_setzero_ps();
__m128 worldnormal = _mm_setzero_ps();

// Calculate gradients
Expand Down Expand Up @@ -472,7 +474,8 @@ class TriScreenDrawer32
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);

__m128 mrcpW = _mm_set1_ps(1.0f / blockPosY.W);
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
__m128 worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
__m128i dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal);

ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.W += gradientX.W;
Expand All @@ -492,6 +495,13 @@ class TriScreenDrawer32
fixed_t lightstep = (lightnext - lightpos) / 8;
lightstep = lightstep & lightmask;

mrcpW = _mm_set1_ps(1.0f / blockPosX.W);
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosX.WorldX), mrcpW);
__m128i dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal);
__m128i dynlightstep = _mm_srai_epi16(_mm_sub_epi16(dynlightnext, dynlight), 3);
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, _mm_and_si128(dynlightstep, _mm_set_epi32(0xffff,0xffff,0,0))), _mm_set1_epi16(256)), _mm_setzero_si128());
dynlightstep = _mm_slli_epi16(dynlightstep, 1);

for (int ix = 0; ix < 4; ix++)
{
// Load bgcolor
Expand Down Expand Up @@ -535,11 +545,13 @@ class TriScreenDrawer32

// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, lights, num_lights, worldpos, worldnormal);
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight);
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);

// Store result
_mm_storel_epi64((__m128i*)(dest + ix * 2), outcolor);

dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, dynlightstep), _mm_set1_epi16(256)), _mm_setzero_si128());
}

blockPosY.W += gradientY.W;
Expand All @@ -565,7 +577,8 @@ class TriScreenDrawer32
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);

__m128 mrcpW = _mm_set1_ps(1.0f / blockPosY.W);
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
__m128 worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
__m128i dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal);

ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.W += gradientX.W;
Expand All @@ -585,6 +598,13 @@ class TriScreenDrawer32
fixed_t lightstep = (lightnext - lightpos) / 8;
lightstep = lightstep & lightmask;

mrcpW = _mm_set1_ps(1.0f / blockPosX.W);
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosX.WorldX), mrcpW);
__m128i dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal);
__m128i dynlightstep = _mm_srai_epi16(_mm_sub_epi16(dynlightnext, dynlight), 3);
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, _mm_and_si128(dynlightstep, _mm_set_epi32(0xffff, 0xffff, 0, 0))), _mm_set1_epi16(256)), _mm_setzero_si128());
dynlightstep = _mm_slli_epi16(dynlightstep, 1);

for (int x = 0; x < 4; x++)
{
// Load bgcolor
Expand Down Expand Up @@ -633,14 +653,16 @@ class TriScreenDrawer32

// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, lights, num_lights, worldpos, worldnormal);
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight);
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);

// Store result
_mm_storel_epi64((__m128i*)desttmp, outcolor);
if (mask0 & (1 << 31)) dest[x * 2] = desttmp[0];
if (mask0 & (1 << 30)) dest[x * 2 + 1] = desttmp[1];

dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, dynlightstep), _mm_set1_epi16(256)), _mm_setzero_si128());

mask0 <<= 2;
}

Expand All @@ -665,7 +687,8 @@ class TriScreenDrawer32
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);

__m128 mrcpW = _mm_set1_ps(1.0f / blockPosY.W);
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
__m128 worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
__m128i dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal);

ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.W += gradientX.W;
Expand All @@ -685,6 +708,13 @@ class TriScreenDrawer32
fixed_t lightstep = (lightnext - lightpos) / 8;
lightstep = lightstep & lightmask;

mrcpW = _mm_set1_ps(1.0f / blockPosX.W);
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosX.WorldX), mrcpW);
__m128i dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal);
__m128i dynlightstep = _mm_srai_epi16(_mm_sub_epi16(dynlightnext, dynlight), 3);
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, _mm_and_si128(dynlightstep, _mm_set_epi32(0xffff, 0xffff, 0, 0))), _mm_set1_epi16(256)), _mm_setzero_si128());
dynlightstep = _mm_slli_epi16(dynlightstep, 1);

for (int x = 0; x < 4; x++)
{
// Load bgcolor
Expand Down Expand Up @@ -733,14 +763,16 @@ class TriScreenDrawer32

// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, lights, num_lights, worldpos, worldnormal);
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight);
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);

// Store result
_mm_storel_epi64((__m128i*)desttmp, outcolor);
if (mask1 & (1 << 31)) dest[x * 2] = desttmp[0];
if (mask1 & (1 << 30)) dest[x * 2 + 1] = desttmp[1];

dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, dynlightstep), _mm_set1_epi16(256)), _mm_setzero_si128());

mask1 <<= 2;
}

Expand Down Expand Up @@ -892,7 +924,7 @@ class RectScreenDrawer32

// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, nullptr, 0, _mm_setzero_ps(), _mm_setzero_ps());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, _mm_setzero_si128());
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);

// Store result
Expand Down Expand Up @@ -920,7 +952,7 @@ class RectScreenDrawer32

// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, nullptr, 0, _mm_setzero_ps(), _mm_setzero_ps());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, _mm_setzero_si128());
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);

// Store result
Expand Down

0 comments on commit ca2ef80

Please sign in to comment.