Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #9876: MacBook Touch Bar crash / render issues w/ 32bpp graphics #10060

Merged
merged 3 commits into from
Oct 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 11 additions & 4 deletions src/blitter/32bpp_anim_sse4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL
const __m128i a_cm = ALPHA_CONTROL_MASK;
const __m128i pack_low_cm = PACK_LOW_CONTROL_MASK;
const __m128i tr_nom_base = TRANSPARENT_NOM_BASE;
const __m128i a_am = ALPHA_AND_MASK;

for (int y = bp->height; y != 0; y--) {
Colour *dst = dst_line;
Expand Down Expand Up @@ -144,7 +145,7 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL

/* Blend colours. */
bmno_alpha_blend:
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm, a_am);
bmno_full_opacity:
_mm_storel_epi64((__m128i *) dst, srcABCD);
bmno_full_transparency:
Expand All @@ -171,7 +172,7 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL
} else {
srcABCD = _mm_cvtsi32_si128(src->data);
}
dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm));
dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm, a_am));
}
}
break;
Expand Down Expand Up @@ -255,7 +256,7 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL

/* Blend colours. */
bmcr_alpha_blend:
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm, a_am);
bmcr_full_opacity:
_mm_storel_epi64((__m128i *) dst, srcABCD);
bmcr_full_transparency:
Expand Down Expand Up @@ -288,7 +289,7 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL
if (src->a < 255) {
bmcr_alpha_blend_single:
__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm, a_am);
}
dst->data = _mm_cvtsi128_si32(srcABCD);
}
Expand Down Expand Up @@ -367,6 +368,12 @@ IGNORE_UNINITIALIZED_WARNING_STOP
*/
void Blitter_32bppSSE4_Anim::Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom)
{
if (_screen_disable_anim) {
/* This means our output is not to the screen, so we can't be doing any animation stuff, so use our parent Draw() */
Blitter_32bppSSE4::Draw(bp, mode, zoom);
return;
}

const Blitter_32bppSSE_Base::SpriteFlags sprite_flags = ((const Blitter_32bppSSE_Base::SpriteData *) bp->sprite)->flags;
switch (mode) {
default: {
Expand Down
5 changes: 3 additions & 2 deletions src/blitter/32bpp_anim_sse4.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#define MARGIN_NORMAL_THRESHOLD 4

/** The SSE4 32 bpp blitter with palette animation. */
class Blitter_32bppSSE4_Anim FINAL : public Blitter_32bppSSE2_Anim, public Blitter_32bppSSE_Base {
class Blitter_32bppSSE4_Anim FINAL : public Blitter_32bppSSE2_Anim, public Blitter_32bppSSE4 {
private:

public:
Expand All @@ -43,13 +43,14 @@ class Blitter_32bppSSE4_Anim FINAL : public Blitter_32bppSSE2_Anim, public Blitt
return Blitter_32bppSSE_Base::Encode(sprite, allocator);
}
const char *GetName() override { return "32bpp-sse4-anim"; }
using Blitter_32bppSSE2_Anim::LookupColourInPalette;
};

/** Factory for the SSE4 32 bpp blitter (with palette animation). */
class FBlitter_32bppSSE4_Anim: public BlitterFactory {
public:
FBlitter_32bppSSE4_Anim() : BlitterFactory("32bpp-sse4-anim", "32bpp SSE4 Blitter (palette animation)", HasCPUIDFlag(1, 2, 19)) {}
Blitter *CreateInstance() override { return new Blitter_32bppSSE4_Anim(); }
Blitter *CreateInstance() override { return static_cast<Blitter_32bppSSE2_Anim *>(new Blitter_32bppSSE4_Anim()); }
};

#endif /* WITH_SSE */
Expand Down
26 changes: 16 additions & 10 deletions src/blitter/32bpp_sse_func.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,27 +65,31 @@ static inline __m128i DistributeAlpha(const __m128i from, const __m128i &mask)
{
#if (SSE_VERSION == 2)
__m128i alphaAB = _mm_shufflelo_epi16(from, 0x3F); // PSHUFLW, put alpha1 in front of each rgb1
return _mm_shufflehi_epi16(alphaAB, 0x3F); // PSHUFHW, put alpha2 in front of each rgb2
alphaAB = _mm_shufflehi_epi16(alphaAB, 0x3F); // PSHUFHW, put alpha2 in front of each rgb2
return _mm_andnot_si128(mask, alphaAB); // PANDN, set alpha fields to 0
#else
return _mm_shuffle_epi8(from, mask);
#endif
}

GNU_TARGET(SSE_TARGET)
static inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst, const __m128i &distribution_mask, const __m128i &pack_mask)
static inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst, const __m128i &distribution_mask, const __m128i &pack_mask, const __m128i &alpha_mask)
{
__m128i srcAB = _mm_unpacklo_epi8(src, _mm_setzero_si128()); // PUNPCKLBW, expand each uint8 into uint16
__m128i dstAB = _mm_unpacklo_epi8(dst, _mm_setzero_si128());

__m128i alphaAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128()); // PCMPGTW, if (alpha > 0) a++;
alphaAB = _mm_srli_epi16(alphaAB, 15);
alphaAB = _mm_add_epi16(alphaAB, srcAB);
__m128i alphaMaskAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128()); // PCMPGTW (alpha > 0) ? 0xFFFF : 0
__m128i alphaAB = _mm_sub_epi16(srcAB, alphaMaskAB); // if (alpha > 0) a++;
alphaAB = DistributeAlpha(alphaAB, distribution_mask);

srcAB = _mm_sub_epi16(srcAB, dstAB); // PSUBW, (r - Cr)
srcAB = _mm_mullo_epi16(srcAB, alphaAB); // PMULLW, a*(r - Cr)
srcAB = _mm_srli_epi16(srcAB, 8); // PSRLW, a*(r - Cr)/256
srcAB = _mm_add_epi16(srcAB, dstAB); // PADDW, a*(r - Cr)/256 + Cr

alphaMaskAB = _mm_and_si128(alphaMaskAB, alpha_mask); // PAND, set non alpha fields to 0
srcAB = _mm_or_si128(srcAB, alphaMaskAB); // POR, set alpha fields to 0xFFFF is src alpha was > 0

return PackUnsaturated(srcAB, pack_mask);
}

Expand Down Expand Up @@ -227,9 +231,11 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel
const MapValue *src_mv = src_mv_line;

/* Load these variables into register before loop. */
const __m128i alpha_and = ALPHA_AND_MASK;
#define ALPHA_BLEND_PARAM_3 alpha_and
#if (SSE_VERSION == 2)
const __m128i clear_hi = CLEAR_HIGH_BYTE_MASK;
#define ALPHA_BLEND_PARAM_1 clear_hi
#define ALPHA_BLEND_PARAM_1 alpha_and
#define ALPHA_BLEND_PARAM_2 clear_hi
#define DARKEN_PARAM_1 tr_nom_base
#define DARKEN_PARAM_2 tr_nom_base
Expand Down Expand Up @@ -275,15 +281,15 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel
for (uint x = (uint) effective_width / 2; x > 0; x--) {
__m128i srcABCD = _mm_loadl_epi64((const __m128i*) src);
__m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
_mm_storel_epi64((__m128i*) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
_mm_storel_epi64((__m128i*) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3));
src += 2;
dst += 2;
}

if ((bt_last == BT_NONE && effective_width & 1) || bt_last == BT_ODD) {
__m128i srcABCD = _mm_cvtsi32_si128(src->data);
__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3));
}
break;

Expand Down Expand Up @@ -328,7 +334,7 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel
}

/* Blend colours. */
_mm_storel_epi64((__m128i *) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
_mm_storel_epi64((__m128i *) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3));
dst += 2;
src += 2;
src_mv += 2;
Expand Down Expand Up @@ -357,7 +363,7 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel
if (src->a < 255) {
bmcr_alpha_blend_single:
__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2);
srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3);
}
dst->data = _mm_cvtsi128_si32(srcABCD);
}
Expand Down
1 change: 1 addition & 0 deletions src/blitter/32bpp_sse_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ typedef union ALIGN(16) um128i {
#define OVERBRIGHT_VALUE_MASK _mm_setr_epi8(-1, 0, -1, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, 0, 0)
#define OVERBRIGHT_CONTROL_MASK _mm_setr_epi8( 0, 1, 0, 1, 0, 1, 7, 7, 2, 3, 2, 3, 2, 3, 7, 7)
#define TRANSPARENT_NOM_BASE _mm_setr_epi16(256, 256, 256, 256, 256, 256, 256, 256)
#define ALPHA_AND_MASK _mm_setr_epi16( 0, 0, 0, -1, 0, 0, 0, -1)

#endif /* WITH_SSE */
#endif /* BLITTER_32BPP_SSE_TYPE_H */