Skip to content
Permalink
Browse files
MDEV-21362 do something with -fno-builtin-memcmp for rem0cmp.cc
Such CMake check is not relevant as the oldest supported gcc is 4.8 and
compiler issue was solved in gcc-4.6 already.
https://godbolt.org/z/7G64qo

cmp_data(): simplify code
  • Loading branch information
kevgs committed Dec 24, 2019
1 parent 714762d commit 3734439
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 65 deletions.
@@ -93,17 +93,6 @@ IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
## As of Mar 15 2011 this flag causes 3573+ warnings. If you are reading this
## please fix them and enable the following code:
#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion")

IF (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" OR
CMAKE_SYSTEM_PROCESSOR MATCHES "i386")
INCLUDE(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("-fno-builtin-memcmp" HAVE_NO_BUILTIN_MEMCMP)
IF (HAVE_NO_BUILTIN_MEMCMP)
# Work around http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/rem/rem0cmp.cc
PROPERTIES COMPILE_FLAGS -fno-builtin-memcmp)
ENDIF()
ENDIF()
ENDIF()

# Enable InnoDB's UNIV_DEBUG in debug builds
@@ -449,64 +449,19 @@ cmp_data(
data2, (unsigned) len2));
}

ulint len;
int cmp;
ulint len = std::min(len1, len2);

if (len1 < len2) {
len = len1;
len2 -= len;
len1 = 0;
} else {
len = len2;
len1 -= len;
len2 = 0;
}

if (len) {
#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64
/* Compare the first bytes with a loop to avoid the call
overhead of memcmp(). On x86 and x86-64, the GCC built-in
(repz cmpsb) seems to be very slow, so we will be calling the
libc version. http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
tracks the slowness of the GCC built-in memcmp().
We compare up to the first 4..7 bytes with the loop.
The (len & 3) is used for "normalizing" or
"quantizing" the len parameter for the memcmp() call,
in case the whole prefix is equal. On x86 and x86-64,
the GNU libc memcmp() of equal strings is faster with
len=4 than with len=3.
On other architectures than the IA32 or AMD64, there could
be a built-in memcmp() that is faster than the loop.
We only use the loop where we know that it can improve
the performance. */
for (ulint i = 4 + (len & 3); i > 0; i--) {
cmp = int(*data1++) - int(*data2++);
if (cmp) {
return(cmp);
}
int cmp = memcmp(data1, data2, len);

if (!--len) {
break;
}
}

if (len) {
#endif /* IA32 or AMD64 */
cmp = memcmp(data1, data2, len);

if (cmp) {
return(cmp);
}

data1 += len;
data2 += len;
#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64
}
#endif /* IA32 or AMD64 */
if (cmp) {
return (cmp);
}

data1 += len;
data2 += len;
len1 -= len;
len2 -= len;

cmp = (int) (len1 - len2);

if (!cmp || pad == ULINT_UNDEFINED) {

0 comments on commit 3734439

Please sign in to comment.