Skip to content

Commit

Permalink
Refactor render_layer_code to C++, add optimizations (#211)
Browse files Browse the repository at this point in the history
* Initial C++ refactor of render_layer_code

* Remove ppal template specialization to partially fix performance

* Fix buggy render_layer_code clipping, enable align>bpp when clipping

* Add render_layer_func set_colors optimization for mode 0,PPW>1

* Add render_layer_code SMZX performance enhancements

* Cleanup, part 1

* Move compiler version checks out of Makefile

* Fix clang warnings

* Revert fcheck back to fbounds-check

* Fix PPW>1 renderers for big endian platforms

* Make render_layer_func print renderer for debug builds only
  • Loading branch information
AliceLR committed Mar 2, 2020
1 parent 2b8662e commit 52ddcc6
Show file tree
Hide file tree
Showing 6 changed files with 888 additions and 40 deletions.
72 changes: 45 additions & 27 deletions Makefile
Expand Up @@ -39,6 +39,8 @@ MKDIR ?= mkdir
MV ?= mv
RM ?= rm

include arch/compat.inc

#
# Set up CFLAGS/LDFLAGS for all MegaZeux external dependencies.
#
Expand Down Expand Up @@ -213,51 +215,50 @@ CFLAGS += -Wundef -Wunused-macros
CXXFLAGS += -Wundef -Wunused-macros
endif

#
# Enable C++11 for compilers that support it.
# Anything actually using C++11 should be optional or platform-specific,
# as features using C++11 reduce portability.
#
ifeq (${HAS_CXX_11},1)
CXX_STD = -std=gnu++11
else
CXX_STD = -std=gnu++98
endif

#
# Always generate debug information; this may end up being
# stripped (on embedded platforms) or objcopy'ed out.
#
CFLAGS += -g -W -Wall -Wno-unused-parameter -std=gnu99
CFLAGS += -Wdeclaration-after-statement ${ARCH_CFLAGS}
CXXFLAGS += -g -W -Wall -Wno-unused-parameter -std=gnu++98
CXXFLAGS += -g -W -Wall -Wno-unused-parameter ${CXX_STD}
CXXFLAGS += -fno-exceptions -fno-rtti ${ARCH_CXXFLAGS}
LDFLAGS += ${ARCH_LDFLAGS}

#
# GCC version >= 7.x
# Optional compile flags.
#

GCC_VER := ${shell ${CC} -dumpversion}
GCC_VER_MAJOR := ${shell ${CC} -dumpversion | cut -d. -f1}
GCC_VER_MAJOR_GE_7 := ${shell test $(GCC_VER_MAJOR) -ge 7; echo $$?}
IS_CLANG := ${shell ${CC} --version | grep -qi "clang version"; echo $$?}

ifeq ($(GCC_VER_MAJOR_GE_7),0)
# This gives spurious warnings on Linux. The snprintf implementation on Linux
# will terminate even in the case of truncation, making this largely useless.
# It does not trigger using mingw, where it would actually matter.
# Clang (as of 9.0) does not support this flag.
ifneq (${IS_CLANG},0)
#
# Linux GCC gives spurious format truncation warnings. The snprintf
# implementation on Linux will terminate even in the case of truncation,
# making this largely useless. It does not trigger using mingw (where it
# would actually matter).
#
ifeq (${HAS_W_NO_FORMAT_TRUNCATION},1)
CFLAGS += -Wno-format-truncation
endif
endif

#
# GCC version >= 4.x
# Enable bounds checks for debug builds.
#

GCC_VER_MAJOR_GE_4 := ${shell test $(GCC_VER_MAJOR) -ge 4; echo $$?}

ifeq ($(GCC_VER_MAJOR_GE_4),0)

ifeq (${DEBUG},1)
ifneq (${GCC_VER},4.2.1)
ifneq (${IS_CLANG},0)
ifeq (${HAS_F_BOUNDS_CHECK},1)
CFLAGS += -fbounds-check
CXXFLAGS += -fbounds-check
endif
endif
endif

#
# We enable pedantic warnings here, but this ends up turning on some things
Expand All @@ -267,31 +268,48 @@ endif
# support have them.
#
ifneq (${PLATFORM},android)
CFLAGS += -pedantic -Wno-variadic-macros
CXXFLAGS += -pedantic -fpermissive -Wno-variadic-macros
ifeq (${HAS_PEDANTIC},1)
CFLAGS += -pedantic
CXXFLAGS += -pedantic

ifeq (${HAS_F_PERMISSIVE},1)
CXXFLAGS += -fpermissive
endif

ifeq (${HAS_W_NO_VARIADIC_MACROS},1)
CFLAGS += -Wno-variadic-macros
CXXFLAGS += -Wno-variadic-macros
endif
endif
endif

#
# The following flags are not applicable to mingw builds.
#
ifneq (${PLATFORM},mingw)

#
# Symbols in COFF binaries are implicitly hidden unless exported; this
# flag just confuses GCC and must be disabled.
#
ifeq (${HAS_F_VISIBILITY},1)
CFLAGS += -fvisibility=hidden
CXXFLAGS += -fvisibility=hidden
endif

#
# Skip the stack protector on embedded platforms; it just unnecessarily
# slows things down, and there's no easy way to write a convincing
# __stack_chk_fail function. MinGW may or may not have a __stack_chk_fail
# function. Skip android, too.
# function.
#
ifeq (${HAS_F_STACK_PROTECTOR},1)
ifeq ($(or ${BUILD_GP2X},${BUILD_NDS},${BUILD_3DS},${BUILD_PSP},${BUILD_WII}),)
CFLAGS += -fstack-protector-all
CXXFLAGS += -fstack-protector-all
endif

endif

endif

#
Expand Down
71 changes: 71 additions & 0 deletions arch/compat.inc
@@ -0,0 +1,71 @@
#
# Makefile compiler compatibility checks.
# Define flags for optional compiler flags or settings to keep most
# of the compiler version check jank out of the main Makefile.
#

IS_GCC := ${shell ${CC} --version | grep -qi "gcc"; echo $$?}
IS_CLANG := ${shell ${CC} --version | grep -qi "clang version"; echo $$?}

GCC_VER := ${shell ${CC} -dumpfullversion -dumpversion |\
awk -F. '{print $$3+100*($$2+100*$$1)}'}

GCC_VER_GE_4 := ${shell test ${GCC_VER} -ge 40000; echo $$?}
GCC_VER_GT_4_2_1 := ${shell test ${GCC_VER} -gt 40201; echo $$?}
GCC_VER_GE_4_8 := ${shell test ${GCC_VER} -ge 40800; echo $$?}
GCC_VER_GE_7 := ${shell test ${GCC_VER} -ge 70000; echo $$?}

ifeq (${IS_CLANG},0)
CLANG_VER := ${shell ${CC} --version |\
grep -oi "^clang version.* [0-9]\+\.[0-9]\+\.[0-9]\+" |\
grep -o "[0-9]\+.*" |\
awk -F. '{print $$3+100*($$2+100*$$1)}'}

CLANG_VER_GE_3_3 := ${shell test "${CLANG_VER}" -ge 30300; echo $$?}
endif
#
# Features requiring GCC 4 minimum.
# NOTE: most of these haven't been verified; this is how they were originally
# version checked in the Makefile.
#
ifeq (${GCC_VER_GE_4},0)
HAS_PEDANTIC = 1
HAS_F_PERMISSIVE = 1
HAS_F_STACK_PROTECTOR = 1
HAS_F_VISIBILITY = 1
HAS_W_NO_VARIADIC_MACROS = 1
endif
#
# Features requiring higher versions of GCC.
#
ifneq (${IS_CLANG},0)
# Not actually sure if this is the correct version; the != 4.2.1 check that
# existed prior in the Makefile may have been intended to block clang.
ifeq (${GCC_VER_GT_4_2_1},0)
HAS_F_BOUNDS_CHECK = 1
endif
ifeq ($(GCC_VER_GE_7),0)
HAS_W_NO_FORMAT_TRUNCATION = 1
endif
endif
#
# C++11 support.
# C++11 is optional or unused for all core MegaZeux features, but may be
# required in the future for optional features (e.g. the network layer).
#
ifeq ($(IS_GCC),0)
ifeq ($(GCC_VER_GE_4_8),0)
HAS_CXX_11 = 1
endif
endif
ifeq ($(IS_CLANG),0)
ifeq ($(CLANG_VER_GE_3_3),0)
HAS_CXX_11 = 1
endif
endif
10 changes: 9 additions & 1 deletion docs/changelog.txt
Expand Up @@ -45,15 +45,23 @@ USERS
+ The Emscripten frontend now falls back to a wrapper for MZX's
internal ZIP support when UZIP fails to extract an archive.
+ Fixed IndexedDB support for legacy Edge versions. (asie)
+ Fixed an 8bpp software layer renderer bug where transparent UI
colors would not be transparent in SMZX mode.
+ General software layer renderer (software, softscale, opengl1)
performance improvements.
- Removed the options "overlay1" and "overlay2" from the list of
selectable renderer options except when SDL 1.2 is enabled.

DEVELOPERS

+ render_layer.c now disables the (unused) 64-aligned renderers
+ Refactored render_layer_code to use C++ templates.
+ render_layer now disables the (unused) 64-aligned renderers
for 32-bit platforms and Emscripten builds.
+ Fixed buggy render_layer clipping that required clipping
renderers to unconditionally use a suboptimal alignment.
+ Switched from xmp_load_module to xmp_load_module_from_file.
+ Fixed various clang unused argument compiler warnings.
+ Moved Makefile compiler version checks to arch/compat.inc.


September 23rd, 2019 - MZX 2.92b
Expand Down
File renamed without changes.
21 changes: 9 additions & 12 deletions src/render_layer.c → src/render_layer.cpp
Expand Up @@ -36,7 +36,7 @@
#define SKIP_64_ALIGN
#endif

#include "render_layer_code.h"
#include "render_layer_code.hpp"

#if 0
// This layer renderer is very slow, but it should work properly.
Expand Down Expand Up @@ -208,6 +208,12 @@ void render_layer(void *pixels, int force_bpp, Uint32 pitch,
drawStart =
(size_t)((char *)pixels + layer->y * pitch + (layer->x * force_bpp / 8));

/**
* Select the highest pixel align the current platform is capable of.
* Additionally, to simplify the renderer code, the align must also be
* capable of addressing the first pixel of the draw (e.g. a 64-bit platform
* will use a 32-bit align if the layer is on an odd horizontal pixel).
*/
#ifndef SKIP_64_ALIGN
if((sizeof(size_t) >= sizeof(Uint64)) && ((drawStart % sizeof(Uint64)) == 0))
{
Expand All @@ -227,15 +233,6 @@ void render_layer(void *pixels, int force_bpp, Uint32 pitch,
align = 16;
}

if(clip)
align = force_bpp;

#if PLATFORM_BYTE_ORDER == PLATFORM_BIG_ENDIAN
// Currently not sure how big endian will work,
// so for now force alignment to bpp
align = force_bpp;
#endif

render_func_Xbpp_Xtrans_Xalign_Xsmzx_Xclip_Xppal(pixels, pitch, graphics,
layer, ppal, clip, smzx, align, trans, force_bpp);
render_layer_func(pixels, pitch, graphics, layer,
force_bpp, align, smzx, ppal, trans, clip);
}

0 comments on commit 52ddcc6

Please sign in to comment.