Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Color profile check YUV2RGB matrix #7384

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 10 additions & 9 deletions libfreerdp/primitives/prim_YUV.c
Original file line number Diff line number Diff line change
Expand Up @@ -460,9 +460,9 @@ static pstatus_t general_YUV444ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const
}
}
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
* | R | ( | 298 0 409 | | Y - 16 | )
* | G | = ( | 298 -100 -208 | | U - 128 | ) >> 8
* | B | ( | 298 516 0 | | V - 128 | )
*/
static pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const UINT32 srcStep[3],
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
Expand Down Expand Up @@ -589,23 +589,24 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const
}

/**
* | Y | ( | 54 183 18 | | R | ) | 0 |
* | U | = ( | -29 -99 128 | | G | ) >> 8 + | 128 |
* | V | ( | 128 -116 -12 | | B | ) | 128 |
* | Y | ( ( | 66 129 25 | | R | ) | 128 | ) | 16 |
* | U | = ( ( | -38 -74 112 | | G | ) + | 128 | ) >> 8 + | 128 |
* | V | ( ( | 112 -94 -18 | | B | ) | 128 | ) | 128 |
*/

static INLINE BYTE RGB2Y(BYTE R, BYTE G, BYTE B)
{
return (54 * R + 183 * G + 18 * B) >> 8;
return ((66lu * R + 129lu * G + 25lu * B + 128lu) >> 8lu) + 16lu;
}

static INLINE BYTE RGB2U(BYTE R, BYTE G, BYTE B)
{
return ((-29u * R - 99u * G + 128u * B) >> 8u) + 128u;
return ((-38lu * R - 74lu * G + 112lu * B + 128lu) >> 8lu) + 128lu;
}

static INLINE BYTE RGB2V(INT32 R, INT32 G, INT32 B)
{
return ((128lu * R - 116lu * G - 12lu * B) >> 8lu) + 128lu;
return ((112lu * R - 94lu * G - 18lu * B + 128lu) >> 8lu) + 128lu;
}

static pstatus_t general_RGBToYUV444_8u_P3AC4R(const BYTE* pSrc, UINT32 SrcFormat,
Expand Down
73 changes: 41 additions & 32 deletions libfreerdp/primitives/prim_YUV_neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,19 @@

static primitives_t* generic = NULL;

/**
* | R | ( | 298 0 409 | | Y - 16 | )
* | G | = ( | 298 -100 -208 | | U - 128 | ) >> 8
* | B | ( | 298 516 0 | | V - 128 | )
*/

static INLINE uint8x8_t neon_YUV2R(int32x4_t Ch, int32x4_t Cl, int16x4_t Dh, int16x4_t Dl,
int16x4_t Eh, int16x4_t El)
{
/* R = (256 * Y + 403 * (V - 128)) >> 8 */
const int16x4_t c403 = vdup_n_s16(403);
const int32x4_t CEh = vmlal_s16(Ch, Eh, c403);
const int32x4_t CEl = vmlal_s16(Cl, El, c403);
/* R = (298 * (Y - 16) + 409 * (V - 128)) >> 8 */
const int16x4_t c409 = { 409, 409, 409, 409 };
const int32x4_t CEh = vmlal_s16(Ch, Eh, c409);
const int32x4_t CEl = vmlal_s16(Cl, El, c409);
const int32x4_t Rh = vrshrq_n_s32(CEh, 8);
const int32x4_t Rl = vrshrq_n_s32(CEl, 8);
const int16x8_t R = vcombine_s16(vqmovn_s32(Rl), vqmovn_s32(Rh));
Expand All @@ -55,13 +61,13 @@ static INLINE uint8x8_t neon_YUV2R(int32x4_t Ch, int32x4_t Cl, int16x4_t Dh, int
static INLINE uint8x8_t neon_YUV2G(int32x4_t Ch, int32x4_t Cl, int16x4_t Dh, int16x4_t Dl,
int16x4_t Eh, int16x4_t El)
{
/* G = (256L * Y - 48 * (U - 128) - 120 * (V - 128)) >> 8 */
const int16x4_t c48 = vdup_n_s16(48);
const int16x4_t c120 = vdup_n_s16(120);
const int32x4_t CDh = vmlsl_s16(Ch, Dh, c48);
const int32x4_t CDl = vmlsl_s16(Cl, Dl, c48);
const int32x4_t CDEh = vmlsl_s16(CDh, Eh, c120);
const int32x4_t CDEl = vmlsl_s16(CDl, El, c120);
/* G = (298 * (Y - 16) - 100 * (U - 128) - 208 * (V - 128)) >> 8 */
const int16x4_t c100 = { 100, 100, 100, 100 };
const int16x4_t c208 = { 208, 208, 208, 208 };
const int32x4_t CDh = vmlsl_s16(Ch, Dh, c100);
const int32x4_t CDl = vmlsl_s16(Cl, Dl, c100);
const int32x4_t CDEh = vmlsl_s16(CDh, Eh, c208);
const int32x4_t CDEl = vmlsl_s16(CDl, El, c208);
const int32x4_t Gh = vrshrq_n_s32(CDEh, 8);
const int32x4_t Gl = vrshrq_n_s32(CDEl, 8);
const int16x8_t G = vcombine_s16(vqmovn_s32(Gl), vqmovn_s32(Gh));
Expand All @@ -71,10 +77,10 @@ static INLINE uint8x8_t neon_YUV2G(int32x4_t Ch, int32x4_t Cl, int16x4_t Dh, int
static INLINE uint8x8_t neon_YUV2B(int32x4_t Ch, int32x4_t Cl, int16x4_t Dh, int16x4_t Dl,
int16x4_t Eh, int16x4_t El)
{
/* B = (256L * Y + 475 * (U - 128)) >> 8*/
const int16x4_t c475 = vdup_n_s16(475);
const int32x4_t CDh = vmlal_s16(Ch, Dh, c475);
const int32x4_t CDl = vmlal_s16(Ch, Dl, c475);
/* B = (298 * (Y - 16) + 516 * (U - 128)) >> 8*/
const int16x4_t c516 = { 516, 516, 516, 516 };
const int32x4_t CDh = vmlal_s16(Ch, Dh, c516);
const int32x4_t CDl = vmlal_s16(Ch, Dl, c516);
const int32x4_t Bh = vrshrq_n_s32(CDh, 8);
const int32x4_t Bl = vrshrq_n_s32(CDl, 8);
const int16x8_t B = vcombine_s16(vqmovn_s32(Bl), vqmovn_s32(Bh));
Expand All @@ -86,40 +92,43 @@ static INLINE BYTE* neon_YuvToRgbPixel(BYTE* pRGB, int16x8_t Y, int16x8_t D, int
const uint8_t aPos)
{
uint8x8x4_t bgrx;
const int32x4_t Ch = vmulq_n_s32(vmovl_s16(vget_high_s16(Y)), 256); /* Y * 256 */
const int32x4_t Cl = vmulq_n_s32(vmovl_s16(vget_low_s16(Y)), 256); /* Y * 256 */
const int16x4_t c16 = { 16, 16, 16, 16 };
const int16x4_t Yh = vsub_s16(vget_high_s16(Y), c16);
const int16x4_t Yl = vsub_s16(vget_low_s16(Y), c16);
const int32x4_t Ch = vmulq_n_s32(vmovl_s16(Yh), 298); /* Y * 298 */
const int32x4_t Cl = vmulq_n_s32(vmovl_s16(Yl), 298); /* Y * 298 */
const int16x4_t Dh = vget_high_s16(D);
const int16x4_t Dl = vget_low_s16(D);
const int16x4_t Eh = vget_high_s16(E);
const int16x4_t El = vget_low_s16(E);
{
/* B = (256L * Y + 475 * (U - 128)) >> 8*/
const int16x4_t c475 = vdup_n_s16(475);
const int32x4_t CDh = vmlal_s16(Ch, Dh, c475);
const int32x4_t CDl = vmlal_s16(Cl, Dl, c475);
/* B = (298 * (Y - 16) + 516 * (U - 128)) >> 8*/
const int16x4_t c516 = { 516, 516, 516, 516 };
const int32x4_t CDh = vmlal_s16(Ch, Dh, c516);
const int32x4_t CDl = vmlal_s16(Cl, Dl, c516);
const int32x4_t Bh = vrshrq_n_s32(CDh, 8);
const int32x4_t Bl = vrshrq_n_s32(CDl, 8);
const int16x8_t B = vcombine_s16(vqmovn_s32(Bl), vqmovn_s32(Bh));
bgrx.val[bPos] = vqmovun_s16(B);
}
{
/* G = (256L * Y - 48 * (U - 128) - 120 * (V - 128)) >> 8 */
const int16x4_t c48 = vdup_n_s16(48);
const int16x4_t c120 = vdup_n_s16(120);
const int32x4_t CDh = vmlsl_s16(Ch, Dh, c48);
const int32x4_t CDl = vmlsl_s16(Cl, Dl, c48);
const int32x4_t CDEh = vmlsl_s16(CDh, Eh, c120);
const int32x4_t CDEl = vmlsl_s16(CDl, El, c120);
/* G = (298 * (Y - 16) - 100 * (U - 128) - 208 * (V - 128)) >> 8 */
const int16x4_t c100 = { 100, 100, 100, 100 };
const int16x4_t c208 = { 208, 208, 208, 208 };
const int32x4_t CDh = vmlsl_s16(Ch, Dh, c100);
const int32x4_t CDl = vmlsl_s16(Cl, Dl, c100);
const int32x4_t CDEh = vmlsl_s16(CDh, Eh, c208);
const int32x4_t CDEl = vmlsl_s16(CDl, El, c208);
const int32x4_t Gh = vrshrq_n_s32(CDEh, 8);
const int32x4_t Gl = vrshrq_n_s32(CDEl, 8);
const int16x8_t G = vcombine_s16(vqmovn_s32(Gl), vqmovn_s32(Gh));
bgrx.val[gPos] = vqmovun_s16(G);
}
{
/* R = (256 * Y + 403 * (V - 128)) >> 8 */
const int16x4_t c403 = vdup_n_s16(403);
const int32x4_t CEh = vmlal_s16(Ch, Eh, c403);
const int32x4_t CEl = vmlal_s16(Cl, El, c403);
/* R = (298 * (Y - 16) + 409 * (V - 128)) >> 8 */
const int16x4_t c409 = { 409, 409, 409, 409 };
const int32x4_t CEh = vmlal_s16(Ch, Eh, c409);
const int32x4_t CEl = vmlal_s16(Cl, El, c409);
const int32x4_t Rh = vrshrq_n_s32(CEh, 8);
const int32x4_t Rl = vrshrq_n_s32(CEl, 8);
const int16x8_t R = vcombine_s16(vqmovn_s32(Rl), vqmovn_s32(Rh));
Expand Down