Skip to content

Commit

Permalink
MDEV-33447: libpmem is not available in RHEL 8
Browse files Browse the repository at this point in the history
Because the Red Hat Enterprise Linux 8 core repository does not include
libpmem, let us implement the necessary subset ourselves.

pmem_persist(): Implement for 64-bit x86, ARM, POWER, RISC-V, Loongarch
in a way that should be compatible with the https://github.com/pmem/pmdk/
implementation of pmem_persist().

The CMake option WITH_INNODB_PMEM can be used for enabling or disabling
this interface at compile time. By default, it is enabled on all applicable
systems that are covered by our CI system.

Note: libpmem had not been previously enabled for Loongarch in our
Debian packaging. It was enabled for RISC-V, but we will not enable it
by default on RISC-V or Loongarch because we lack CI coverage.

The generated code for x86_64 was reviewed and tested on two
Intel implementations: one that only supports clflush, and
another that supports both clflushopt and clwb.

The generated machine code was also reviewed on https://godbolt.org
using various compiler versions. Godbolt helpfully includes an option
to compile to binary code and display the encoding, which was
useful on POWER.

Reviewed by: Vladislav Vaintroub
  • Loading branch information
dr-m committed Apr 19, 2024
1 parent 8a3755c commit 3f9f5ca
Show file tree
Hide file tree
Showing 10 changed files with 182 additions and 62 deletions.
18 changes: 0 additions & 18 deletions cmake/FindPMEM.cmake

This file was deleted.

18 changes: 0 additions & 18 deletions debian/autobake-deb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,6 @@ replace_uring_with_aio()
-e '/-DWITH_URING=ON/d' -i debian/rules
}

disable_pmem()
{
sed '/libpmem-dev/d' -i debian/control
sed '/-DWITH_PMEM=ON/d' -i debian/rules
}

disable_libfmt()
{
# 7.0+ required
Expand Down Expand Up @@ -116,21 +110,13 @@ in
"buster")
disable_libfmt
replace_uring_with_aio
if [ ! "$architecture" = amd64 ]
then
disable_pmem
fi
;&
"bullseye")
add_lsb_base_depends
;&
"bookworm")
# mariadb-plugin-rocksdb in control is 4 arches covered by the distro rocksdb-tools
# so no removal is necessary.
if [[ ! "$architecture" =~ amd64|arm64|ppc64el ]]
then
disable_pmem
fi
if [[ ! "$architecture" =~ amd64|arm64|armel|armhf|i386|mips64el|mipsel|ppc64el|s390x ]]
then
replace_uring_with_aio
Expand All @@ -149,10 +135,6 @@ in
add_lsb_base_depends
;&
"lunar"|"mantic")
if [[ ! "$architecture" =~ amd64|arm64|ppc64el ]]
then
disable_pmem
fi
if [[ ! "$architecture" =~ amd64|arm64|armhf|ppc64el|s390x ]]
then
replace_uring_with_aio
Expand Down
1 change: 0 additions & 1 deletion debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ Build-Depends: bison,
libnuma-dev [linux-any],
libpam0g-dev,
libpcre2-dev,
libpmem-dev [amd64 arm64 ppc64el riscv64],
libsnappy-dev,
libssl-dev,
libssl-dev:native,
Expand Down
6 changes: 0 additions & 6 deletions debian/rules
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,6 @@ ifeq (32,$(DEB_HOST_ARCH_BITS))
CMAKEFLAGS += -DPLUGIN_ROCKSDB=NO
endif

# Only attempt to build with PMEM on archs that have package libpmem-dev available
# See https://packages.debian.org/search?searchon=names&keywords=libpmem-dev
ifneq (,$(filter $(DEB_HOST_ARCH),amd64 arm64 ppc64el riscv64))
CMAKEFLAGS += -DWITH_PMEM=ON
endif

# Add support for verbose builds
MAKEFLAGS += VERBOSE=1

Expand Down
4 changes: 0 additions & 4 deletions extra/mariabackup/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ ADD_DEFINITIONS(-UMYSQL_SERVER)
ADD_DEFINITIONS(-DPCRE_STATIC=1)
ADD_DEFINITIONS(${SSL_DEFINES})

IF(PMEM_FOUND)
ADD_COMPILE_FLAGS(xtrabackup.cc COMPILE_FLAGS "-DHAVE_PMEM")
ENDIF()

MYSQL_ADD_EXECUTABLE(mariadb-backup
xtrabackup.cc
innobackupex.cc
Expand Down
23 changes: 10 additions & 13 deletions storage/innobase/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ IF(UNIX)
IF(HAVE_LIBNUMA)
LINK_LIBRARIES(numa)
ENDIF()
IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch|AARCH|p(ower)?pc|x86_|amd)64")
OPTION(WITH_INNODB_PMEM "Support memory-mapped InnoDB redo log" ON)
ELSE() # Disable by default on ISA that are not covered by our CI
OPTION(WITH_INNODB_PMEM "Support memory-mapped InnoDB redo log" OFF)
ENDIF()
ENDIF()
ENDIF()
ENDIF()

Expand Down Expand Up @@ -428,26 +435,16 @@ SET(INNOBASE_SOURCES
ut/ut0vec.cc
ut/ut0wqueue.cc)

OPTION(WITH_PMEM "Support redo log in persistent memory" OFF)
FIND_PACKAGE(PMEM)
IF(PMEM_FOUND)
INCLUDE_DIRECTORIES(${PMEM_INCLUDES})
ADD_COMPILE_FLAGS(log/log0log.cc log/log0recv.cc
buf/buf0flu.cc mtr/mtr0mtr.cc trx/trx0trx.cc srv/srv0start.cc
COMPILE_FLAGS "-DHAVE_PMEM")
SET(PMEM_LIBRARY ${PMEM_LIBRARIES})
ELSE()
IF(WITH_PMEM)
MESSAGE(FATAL_ERROR "WITH_PMEM=ON cannot be satisfied")
ENDIF()
IF(WITH_INNODB_PMEM)
ADD_DEFINITIONS(-DHAVE_PMEM)
SET(INNOBASE_SOURCES ${INNOBASE_SOURCES} include/cache.h sync/cache.cc)
ENDIF()

MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE
MODULE_OUTPUT_NAME ha_innodb
DEFAULT RECOMPILE_FOR_EMBEDDED
LINK_LIBRARIES
${ZLIB_LIBRARY}
${PMEM_LIBRARY}
${NUMA_LIBRARY}
${LIBSYSTEMD}
${LINKER_SCRIPT})
Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/buf/buf0flu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1733,7 +1733,7 @@ static ulint buf_flush_LRU(ulint max_n)
}

#ifdef HAVE_PMEM
# include <libpmem.h>
# include "cache.h"
#endif

/** Write checkpoint information to the log header and release mutex.
Expand Down
33 changes: 33 additions & 0 deletions storage/innobase/include/cache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*****************************************************************************
Copyright (c) 2024, MariaDB plc
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/

#pragma once
#include <cstddef>

#if defined __x86_64__ || defined __aarch64__
struct pmem_control
{
void (*persist)(const void *, size_t);
public:
pmem_control();
};
extern const pmem_control pmem;
# define pmem_persist(buf, size) pmem.persist(buf, size)
#else
void pmem_persist(const void *buf, size_t size);
#endif
2 changes: 1 addition & 1 deletion storage/innobase/log/log0log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ void log_file_t::write(os_offset_t offset, span<const byte> buf) noexcept
}

#ifdef HAVE_PMEM
# include <libpmem.h>
# include "cache.h"

/** Attempt to memory map a file.
@param file log file handle
Expand Down
137 changes: 137 additions & 0 deletions storage/innobase/sync/cache.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*****************************************************************************
Copyright (c) 2024, MariaDB plc
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/

/* This is based on the implementation of pmem_persist() in
https://github.com/pmem/pmdk/, Copyright 2014-2020, Intel Corporation,
last revised in libpmem-1.12.0. */

#include "my_global.h"
#include "cache.h"
#include <cstdint>

#if defined __x86_64__ || defined __aarch64__
# ifdef __x86_64__
static void pmem_clflush(const void *buf, size_t size)
{
for (uintptr_t u= uintptr_t(buf) & ~(CPU_LEVEL1_DCACHE_LINESIZE),
end= uintptr_t(buf) + size;
u < end; u+= CPU_LEVEL1_DCACHE_LINESIZE)
__asm__ __volatile__("clflush %0" ::
"m"(*reinterpret_cast<const char*>(u)) : "memory");
}

static void pmem_clflushopt(const void *buf, size_t size)
{
for (uintptr_t u= uintptr_t(buf) & ~(CPU_LEVEL1_DCACHE_LINESIZE),
end= uintptr_t(buf) + size;
u < end; u+= CPU_LEVEL1_DCACHE_LINESIZE)
__asm__ __volatile__(".byte 0x66; clflush %0" /* clflushopt */ ::
"m"(*reinterpret_cast<const char*>(u)) : "memory");
__asm__ __volatile__("sfence" ::: "memory");
}

static void pmem_clwb(const void *buf, size_t size)
{
for (uintptr_t u= uintptr_t(buf) & ~(CPU_LEVEL1_DCACHE_LINESIZE),
end= uintptr_t(buf) + size;
u < end; u+= CPU_LEVEL1_DCACHE_LINESIZE)
__asm__ __volatile__(".byte 0x66; xsaveopt %0" /* clwb */ ::
"m"(*reinterpret_cast<const char*>(u)) : "memory");
__asm__ __volatile__("sfence" ::: "memory");
}

# include <cpuid.h>
static decltype(pmem_control::persist) pmem_persist_init()
{
uint32_t eax= 0, ebx= 0, ecx= 0, edx= 0;
__cpuid_count(7, 0, eax, ebx, ecx, edx);
if (ebx & 1U<<24 /* CLWB */)
return pmem_clwb;
else if (ebx & 1U<<23 /* CLFLUSHOPT */)
return pmem_clflushopt;
else
return pmem_clflush;
}
# elif defined __aarch64__
static void pmem_cvac(const void* buf, size_t size)
{
for (uintptr_t u= uintptr_t(buf) & ~(CPU_LEVEL1_DCACHE_LINESIZE),
end= uintptr_t(buf) + size;
u < end; u+= CPU_LEVEL1_DCACHE_LINESIZE)
__asm__ __volatile__("dc cvac, %0" :: "r"(u) : "memory");
__asm__ __volatile__("dmb ishst" ::: "memory");
}

static void pmem_cvap(const void* buf, size_t size)
{
for (uintptr_t u= uintptr_t(buf) & ~(CPU_LEVEL1_DCACHE_LINESIZE),
end= uintptr_t(buf) + size;
u < end; u+= CPU_LEVEL1_DCACHE_LINESIZE)
__asm__ __volatile__(".arch armv8.2-a\n dc cvap, %0" :: "r"(u) : "memory");
__asm__ __volatile__("dmb ishst" ::: "memory");
}

# include <sys/auxv.h>
# include <asm/hwcap.h>
# ifndef HWCAP_DCPOP
# define HWCAP_DCPOP (1 << 16)
# endif

static decltype(pmem_control::persist) pmem_persist_init()
{
return (getauxval(AT_HWCAP) & HWCAP_DCPOP) ? pmem_cvap : pmem_cvac;
}
# endif

pmem_control::pmem_control() : persist(pmem_persist_init()) {}
const pmem_control pmem;
#else
void pmem_persist(const void *buf, size_t size)
{
# ifdef __ppc64__
for (uintptr_t u= uintptr_t(buf) & ~(CPU_LEVEL1_DCACHE_LINESIZE),
end= uintptr_t(buf) + size;
u < end; u+= CPU_LEVEL1_DCACHE_LINESIZE)
{
/* GCC is just passing the inline asm snippets to the assembler,
and it does not even define these mnemonics by itself. Clang does,
and it includes a built-in assembler.
Let us hope that having a recent enough GCC is an adequate proxy
for having a recent enough assembler. */
# if __GNUC__ >= 11 || (defined __clang_major__ && __clang_major__ >= 12)
__asm__ __volatile__("dcbstps 0,%0" :: r(u) : "memory");
# else
__asm__ __volatile__(".long (0x7cc000AC | %0 << 11)" :: "r"(u) : "memory");
# endif
}

# if __GNUC__ >= 11 || (defined __clang_major__ && __clang_major__ >= 18)
__asm__ __volatile__("phwsync" ::: "memory");
# else
__asm__ __volatile__(".long 0x7c80040a" ::: "memory");
# endif
# elif defined __riscv && __riscv_xlen == 64
__asm__ __volatile__("fence w,w" ::: "memory");
# elif defined __loongarch64
__asm__ __volatile__("dbar 0" ::: "memory");
# else
# error "Missing implementation; recompile with cmake -DWITH_INNODB_PMEM=OFF"
# endif
}
#endif

2 comments on commit 3f9f5ca

@celestinoxp
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dr-m i can´t find a mariadb 10.11.x snapshot for windows here https://ci.mariadb.org/
windows builds failing ?

@dr-m
Copy link
Contributor Author

@dr-m dr-m commented on 3f9f5ca Apr 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You’d better ask such questions on https://mariadb.zulipchat.com in the "buildbot" stream, or in one of the mailing lists listed at https://mariadb.org/contribute/. Tests on Windows are part of the main branch protection, so they should not fail, but I don’t think that any packages for Windows are being made available via that interface. I am not familiar with the release process.

Please sign in to comment.