diff --git a/build/win32/Cxbx.vcxproj b/build/win32/Cxbx.vcxproj
index e7b9e89bd7..f2d5be5664 100644
--- a/build/win32/Cxbx.vcxproj
+++ b/build/win32/Cxbx.vcxproj
@@ -283,6 +283,7 @@ $(SOLUTIONDIR)Export.bat
+
@@ -543,6 +544,7 @@ $(SOLUTIONDIR)Export.bat
%(PreprocessorDefinitions)
+
%(AdditionalIncludeDirectories)
diff --git a/build/win32/Cxbx.vcxproj.filters b/build/win32/Cxbx.vcxproj.filters
index 92ba99d119..4f0b0415ea 100644
--- a/build/win32/Cxbx.vcxproj.filters
+++ b/build/win32/Cxbx.vcxproj.filters
@@ -199,6 +199,9 @@
GUI
+
+ Emulator
+
@@ -549,6 +552,9 @@
GUI
+
+ Emulator
+
diff --git a/src/CxbxKrnl/EmuD3D8.cpp b/src/CxbxKrnl/EmuD3D8.cpp
index 9bdc4de3b6..7395f94c52 100644
--- a/src/CxbxKrnl/EmuD3D8.cpp
+++ b/src/CxbxKrnl/EmuD3D8.cpp
@@ -53,6 +53,7 @@ namespace xboxkrnl
#include "EmuAlloc.h"
#include "MemoryManager.h"
#include "EmuXTL.h"
+#include "libyuv_extract.h" // for YUY2ToARGB
#include
#include
@@ -6060,13 +6061,6 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_EnableOverlay)
return;
}
-// Based on http://codereview.stackexchange.com/questions/6502/fastest-way-to-clamp-an-integer-to-the-range-0-255
-inline uint08 ClampIntToByte(int x)
-{
- int r = x > 255 ? 255 : x;
- return r < 0 ? 0 : (uint08)r;
-}
-
// ******************************************************************
// * patch: D3DDevice_UpdateOverlay
// ******************************************************************
@@ -6181,8 +6175,6 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_UpdateOverlay)
{
uint08 *pbSource = (uint08*)pSurface->Lock;
uint08 *pbDest = (uint08*)LockedRectDest.pBits;
- uint32 dx = 0, dy = 0;
- uint32 dwImageSize = g_dwOverlayP*g_dwOverlayH;
// Get backbuffer dimenions; TODO : remember this once, at creation/resize time
D3DSURFACE_DESC BackBufferDesc;
@@ -6194,89 +6186,8 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_UpdateOverlay)
uint32 W = min(g_dwOverlayW, BackBufferDesc.Width);
uint32 H = min(g_dwOverlayH, BackBufferDesc.Height);
- // grayscale - TODO : Either remove or make configurable
- if(false)
- {
- // Clip to backbuffer height :
- for(uint32 y=0;yXRGB)
- else
- {
- // The following is a combination of https://pastebin.com/mDcwqJV3 and
- // https://en.wikipedia.org/wiki/YUV#Y.E2.80.B2UV422_to_RGB888_conversion
- // TODO : Improve this to use a library, or SIMD instructions like in
- // https://github.com/descampsa/yuv2rgb/blob/master/yuv_rgb.c
- // https://github.com/lemenkov/libyuv/blob/master/source/row_win.cc#L100
- const int K1 = int(1.402f * (1 << 16));
- const int K2 = int(0.334f * (1 << 16));
- const int K3 = int(0.714f * (1 << 16));
- const int K4 = int(1.772f * (1 << 16));
-
- for(uint32 v=0;v> 16;
- int Gd = ((K2 * nCb) + (K3 * nCr)) >> 16;
- int Bd = (K4 * nCb) >> 16;
-
- uint32 i = (dy * LockedRectDest.Pitch) + (dx * 4);
-
- pbDest[i + 0] = ClampIntToByte((int)Y0 + Bd);
- pbDest[i + 1] = ClampIntToByte((int)Y0 - Gd);
- pbDest[i + 2] = ClampIntToByte((int)Y0 + Rd);
- pbDest[i + 3] = 0xFF;
-
- pbDest[i + 4] = ClampIntToByte((int)Y1 + Bd);
- pbDest[i + 5] = ClampIntToByte((int)Y1 - Gd);
- pbDest[i + 6] = ClampIntToByte((int)Y1 + Rd);
- pbDest[i + 7] = 0xFF;
- }
-
- pbSource += 4;
- dx += 2;
- if ((dx % g_dwOverlayW) == 0)
- {
- dy++;
- // Clip to backbuffer height :
- if (dy >= H)
- break;
-
- dx = 0;
- }
-
- }
- }
+ YUY2ToARGB(pbSource, g_dwOverlayP, pbDest, BackBufferDesc.Width * 4, W, H);
pBackBuffer->UnlockRect();
pBackBuffer->Release();
diff --git a/src/CxbxKrnl/EmuD3D8/State.cpp b/src/CxbxKrnl/EmuD3D8/State.cpp
index 45ae857839..9c86c80392 100644
--- a/src/CxbxKrnl/EmuD3D8/State.cpp
+++ b/src/CxbxKrnl/EmuD3D8/State.cpp
@@ -44,7 +44,6 @@ DWORD *XTL::EmuD3DDeferredRenderState;
DWORD *XTL::EmuD3DDeferredTextureState;
extern uint32 g_BuildVersion;
-extern uint32 g_OrigBuildVersion;
// ******************************************************************
// * patch: UpdateDeferredStates
diff --git a/src/CxbxKrnl/HLEIntercept.cpp b/src/CxbxKrnl/HLEIntercept.cpp
index 95269d6b5c..a82025d974 100644
--- a/src/CxbxKrnl/HLEIntercept.cpp
+++ b/src/CxbxKrnl/HLEIntercept.cpp
@@ -55,7 +55,6 @@ uint32 fcount = 0;
void * funcExclude[2048] = { nullptr };
uint32 g_BuildVersion;
-uint32 g_OrigBuildVersion;
static std::vector vCacheOut;
@@ -331,7 +330,6 @@ void EmuHLEIntercept(Xbe::Header *pXbeHeader)
{
// Save D3D8 build version
g_BuildVersion = BuildVersion;
- g_OrigBuildVersion = OrigBuildVersion;
xbaddr lower = pXbeHeader->dwBaseAddr;
xbaddr upper = pXbeHeader->dwBaseAddr + pXbeHeader->dwSizeofImage;
@@ -506,7 +504,6 @@ void EmuHLEIntercept(Xbe::Header *pXbeHeader)
// {
// // Save D3D8 build version
// g_BuildVersion = BuildVersion;
- // g_OrigBuildVersion = OrigBuildVersion;
// xbaddr lower = pXbeHeader->dwBaseAddr;
// xbaddr upper = pXbeHeader->dwBaseAddr + pXbeHeader->dwSizeofImage;
diff --git a/src/CxbxKrnl/libyuv_extract.cpp b/src/CxbxKrnl/libyuv_extract.cpp
new file mode 100644
index 0000000000..aa46e04a51
--- /dev/null
+++ b/src/CxbxKrnl/libyuv_extract.cpp
@@ -0,0 +1,226 @@
+// This is an open source non-commercial project. Dear PVS-Studio, please check it.
+// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
+
+#include "libyuv_extract.h"
+
+// The following code is an extract of libyuv to keep code size low (this may be revisited later).
+// Source : https://github.com/lemenkov/libyuv/commit/7e936044d154b9fe159a67f9562e10b1ef1cb590
+
+/* From libyuv\README.chromium :
+Name: libyuv
+URL: http://code.google.com/p/libyuv/
+Version: 1514
+License: BSD
+License File: LICENSE
+
+Description:
+libyuv is an open source project that includes YUV conversion and scaling functionality.
+*/
+
+// From libyuv\include\libyuv\row.h :
+
+// This struct is for Intel color conversion.
+struct YuvConstants {
+ int8 kUVToB[32];
+ int8 kUVToG[32];
+ int8 kUVToR[32];
+ int16 kUVBiasB[16];
+ int16 kUVBiasG[16];
+ int16 kUVBiasR[16];
+ int16 kYToRgb[16];
+};
+
+// From libyuv\include\libyuv\row.h :
+
+#if defined(VISUALC_HAS_AVX2)
+#define SIMD_ALIGNED(var) __declspec(align(32)) var
+#else
+#define SIMD_ALIGNED(var) __declspec(align(16)) var
+#endif
+
+// From libyuv\source\row_common.cc :
+
+// llvm x86 is poor at ternary operator, so use branchless min/max.
+#define USE_BRANCHLESS 1
+#if USE_BRANCHLESS
+static __inline int32 clamp0(int32 v) {
+ return ((-(v) >> 31) & (v));
+}
+
+static __inline int32 clamp255(int32 v) {
+ return (((255 - (v)) >> 31) | (v)) & 255;
+}
+
+static __inline uint32 Clamp(int32 val) {
+ int v = clamp0(val);
+ return (uint32)(clamp255(v));
+}
+
+static __inline uint32 Abs(int32 v) {
+ int m = v >> 31;
+ return (v + m) ^ m;
+}
+#else // USE_BRANCHLESS
+static __inline int32 clamp0(int32 v) {
+ return (v < 0) ? 0 : v;
+}
+
+static __inline int32 clamp255(int32 v) {
+ return (v > 255) ? 255 : v;
+}
+
+static __inline uint32 Clamp(int32 val) {
+ int v = clamp0(val);
+ return (uint32)(clamp255(v));
+}
+
+static __inline uint32 Abs(int32 v) {
+ return (v < 0) ? -v : v;
+}
+#endif // USE_BRANCHLESS
+
+// From libyuv\source\row_common.cc :
+
+// BT.601 YUV to RGB reference
+// R = (Y - 16) * 1.164 - V * -1.596
+// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
+// B = (Y - 16) * 1.164 - U * -2.018
+
+// Y contribution to R,G,B. Scale and bias.
+#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
+#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
+
+// U and V contributions to R,G,B.
+#define UB -128 /* max(-128, round(-2.018 * 64)) */
+#define UG 25 /* round(0.391 * 64) */
+#define VG 52 /* round(0.813 * 64) */
+#define VR -102 /* round(-1.596 * 64) */
+
+// Bias values to subtract 16 from Y and 128 from U and V.
+#define BB (UB * 128 + YGB)
+#define BG (UG * 128 + VG * 128 + YGB)
+#define BR (VR * 128 + YGB)
+
+const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
+ { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
+ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
+ { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
+ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
+ { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
+ 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR },
+ { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
+ { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
+ { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
+ { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } };
+
+// C reference code that mimics the YUV assembly.
+static __inline void YuvPixel(uint8 y,
+ uint8 u,
+ uint8 v,
+ uint8* b,
+ uint8* g,
+ uint8* r,
+ const struct YuvConstants* yuvconstants) {
+ int ub = yuvconstants->kUVToB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[1];
+ int vr = yuvconstants->kUVToR[1];
+ int bb = yuvconstants->kUVBiasB[0];
+ int bg = yuvconstants->kUVBiasG[0];
+ int br = yuvconstants->kUVBiasR[0];
+ int yg = yuvconstants->kYToRgb[0];
+
+ uint32 y1 = (uint32)(y * 0x0101 * yg) >> 16;
+ *b = Clamp((int32)(-(u * ub) + y1 + bb) >> 6);
+ *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6);
+ *r = Clamp((int32)(-(v * vr) + y1 + br) >> 6);
+}
+
+void YUY2ToARGBRow_C(const uint8* src_yuy2,
+ uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
+ rgb_buf + 6, yuvconstants);
+ rgb_buf[7] = 255;
+ src_yuy2 += 4;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ }
+}
+
+// Convert YUY2 to ARGB.
+LIBYUV_API
+int YUY2ToARGB(const uint8* src_yuy2,
+ int src_stride_yuy2,
+ uint8* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void(*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb,
+ const struct YuvConstants* yuvconstants, int width) =
+ YUY2ToARGBRow_C;
+ if (!src_yuy2 || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
+ src_stride_yuy2 = -src_stride_yuy2;
+ }
+ // Coalesce rows.
+ if (src_stride_yuy2 == width * 2 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_yuy2 = dst_stride_argb = 0;
+ }
+#if defined(HAS_YUY2TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_MSA;
+ }
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvI601Constants, width);
+ src_yuy2 += src_stride_yuy2;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
diff --git a/src/CxbxKrnl/libyuv_extract.h b/src/CxbxKrnl/libyuv_extract.h
new file mode 100644
index 0000000000..bafae1b69b
--- /dev/null
+++ b/src/CxbxKrnl/libyuv_extract.h
@@ -0,0 +1,18 @@
+// This is an open source non-commercial project. Dear PVS-Studio, please check it.
+// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
+
+#include "Cxbx.h"
+
+typedef int08 int8;
+typedef uint08 uint8;
+
+// From libyuv\include\libyuv\basic_types.h :
+#define LIBYUV_API
+
+LIBYUV_API
+int YUY2ToARGB(const uint8* src_yuy2,
+ int src_stride_yuy2,
+ uint8* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
\ No newline at end of file