Skip to content

Commit

Permalink
SSE blitters: Correctly set alpha of output in AlphaBlendTwoPixels
Browse files Browse the repository at this point in the history
Match alpha behaviour of ComposeColourRGBA

See: OpenTTD/OpenTTD#10060
  • Loading branch information
JGRennison committed Sep 28, 2022
1 parent d843c8b commit 4c48643
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 14 deletions.
9 changes: 5 additions & 4 deletions src/blitter/32bpp_anim_sse4.cpp
Expand Up @@ -53,6 +53,7 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL
const __m128i a_cm = ALPHA_CONTROL_MASK;
const __m128i pack_low_cm = PACK_LOW_CONTROL_MASK;
const __m128i tr_nom_base = TRANSPARENT_NOM_BASE;
const __m128i a_am = ALPHA_AND_MASK;

for (int y = bp->height; y != 0; y--) {
Colour *dst = dst_line;
Expand Down Expand Up @@ -144,7 +145,7 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL

/* Blend colours. */
bmno_alpha_blend:
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm, a_am);
bmno_full_opacity:
_mm_storel_epi64((__m128i *) dst, srcABCD);
bmno_full_transparency:
Expand All @@ -171,7 +172,7 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL
} else {
srcABCD = _mm_cvtsi32_si128(src->data);
}
dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm));
dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm, a_am));
}
}
break;
Expand Down Expand Up @@ -255,7 +256,7 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL

/* Blend colours. */
bmcr_alpha_blend:
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm, a_am);
bmcr_full_opacity:
_mm_storel_epi64((__m128i *) dst, srcABCD);
bmcr_full_transparency:
Expand Down Expand Up @@ -288,7 +289,7 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL
if (src->a < 255) {
bmcr_alpha_blend_single:
__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm, a_am);
}
dst->data = _mm_cvtsi128_si32(srcABCD);
}
Expand Down
28 changes: 18 additions & 10 deletions src/blitter/32bpp_sse_func.hpp
Expand Up @@ -65,27 +65,33 @@ static inline __m128i DistributeAlpha(const __m128i from, const __m128i &mask)
{
#if (SSE_VERSION == 2)
__m128i alphaAB = _mm_shufflelo_epi16(from, 0x3F); // PSHUFLW, put alpha1 in front of each rgb1
return _mm_shufflehi_epi16(alphaAB, 0x3F); // PSHUFHW, put alpha2 in front of each rgb2
alphaAB = _mm_shufflehi_epi16(alphaAB, 0x3F); // PSHUFHW, put alpha2 in front of each rgb2
alphaAB = _mm_or_si128(alphaAB, mask); // POR, set alpha fields to all 1
return _mm_xor_si128(alphaAB, mask); // PXOR, set alpha fields to 0
#else
return _mm_shuffle_epi8(from, mask);
#endif
}

GNU_TARGET(SSE_TARGET)
static inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst, const __m128i &distribution_mask, const __m128i &pack_mask)
static inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst, const __m128i &distribution_mask, const __m128i &pack_mask, const __m128i &alpha_mask)
{
__m128i srcAB = _mm_unpacklo_epi8(src, _mm_setzero_si128()); // PUNPCKLBW, expand each uint8 into uint16
__m128i dstAB = _mm_unpacklo_epi8(dst, _mm_setzero_si128());

__m128i alphaAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128()); // PCMPGTW, if (alpha > 0) a++;
alphaAB = _mm_srli_epi16(alphaAB, 15);
alphaAB = _mm_add_epi16(alphaAB, srcAB);
__m128i alphaMaskAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128()); // PCMPGTW (alpha > 0) ? 0xFFFF : 0
__m128i alphaAB = _mm_srli_epi16(alphaMaskAB, 15);
alphaAB = _mm_add_epi16(alphaAB, srcAB); // if (alpha > 0) a++;
alphaAB = DistributeAlpha(alphaAB, distribution_mask);

srcAB = _mm_sub_epi16(srcAB, dstAB); // PSUBW, (r - Cr)
srcAB = _mm_mullo_epi16(srcAB, alphaAB); // PMULLW, a*(r - Cr)
srcAB = _mm_srli_epi16(srcAB, 8); // PSRLW, a*(r - Cr)/256
srcAB = _mm_add_epi16(srcAB, dstAB); // PADDW, a*(r - Cr)/256 + Cr

alphaMaskAB = _mm_and_si128(alphaMaskAB, alpha_mask); // PAND, set non alpha fields to 0
srcAB = _mm_or_si128(srcAB, alphaMaskAB); // POR, set alpha fields to 0xFFFF is src alpha was > 0

return PackUnsaturated(srcAB, pack_mask);
}

Expand Down Expand Up @@ -227,9 +233,11 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel
const MapValue *src_mv = src_mv_line;

/* Load these variables into register before loop. */
const __m128i alpha_and = ALPHA_AND_MASK;
#define ALPHA_BLEND_PARAM_3 alpha_and
#if (SSE_VERSION == 2)
const __m128i clear_hi = CLEAR_HIGH_BYTE_MASK;
#define ALPHA_BLEND_PARAM_1 clear_hi
#define ALPHA_BLEND_PARAM_1 alpha_and
#define ALPHA_BLEND_PARAM_2 clear_hi
#define DARKEN_PARAM_1 tr_nom_base
#define DARKEN_PARAM_2 tr_nom_base
Expand Down Expand Up @@ -275,15 +283,15 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel
for (uint x = (uint) effective_width / 2; x > 0; x--) {
__m128i srcABCD = _mm_loadl_epi64((const __m128i*) src);
__m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
_mm_storel_epi64((__m128i*) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
_mm_storel_epi64((__m128i*) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3));
src += 2;
dst += 2;
}

if ((bt_last == BT_NONE && effective_width & 1) || bt_last == BT_ODD) {
__m128i srcABCD = _mm_cvtsi32_si128(src->data);
__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3));
}
break;

Expand Down Expand Up @@ -328,7 +336,7 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel
}

/* Blend colours. */
_mm_storel_epi64((__m128i *) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
_mm_storel_epi64((__m128i *) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3));
dst += 2;
src += 2;
src_mv += 2;
Expand Down Expand Up @@ -357,7 +365,7 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel
if (src->a < 255) {
bmcr_alpha_blend_single:
__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2);
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3);
}
dst->data = _mm_cvtsi128_si32(srcABCD);
}
Expand Down
1 change: 1 addition & 0 deletions src/blitter/32bpp_sse_type.h
Expand Up @@ -51,6 +51,7 @@ typedef union ALIGN(16) um128i {
#define OVERBRIGHT_VALUE_MASK _mm_setr_epi8(-1, 0, -1, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, 0, 0)
#define OVERBRIGHT_CONTROL_MASK _mm_setr_epi8( 0, 1, 0, 1, 0, 1, 7, 7, 2, 3, 2, 3, 2, 3, 7, 7)
#define TRANSPARENT_NOM_BASE _mm_setr_epi16(256, 256, 256, 256, 256, 256, 256, 256)
#define ALPHA_AND_MASK _mm_setr_epi16( 0, 0, 0, -1, 0, 0, 0, -1)

#endif /* WITH_SSE */
#endif /* BLITTER_32BPP_SSE_TYPE_H */

0 comments on commit 4c48643

Please sign in to comment.