Skip to content

Commit f4d7545

Browse files
committed
MDEV-33379 innodb_log_file_buffering=OFF causes corruption on bcachefs
Apparently, invoking fcntl(fd, F_SETFL, O_DIRECT) will lead to unexpected behaviour on Linux bcachefs and possibly other file systems, depending on the operating system version. So, let us avoid doing that, and instead just attempt to pass the O_DIRECT flag to open(). This should make us compatible with NetBSD, IBM AIX, as well as Solaris and its derivatives. We will only implement innodb_log_file_buffering=OFF on systems where we can determine the physical block size (typically 512 or 4096 bytes). Currently, those operating systems are Linux and Microsoft Windows. HAVE_FCNTL_DIRECT, os_file_set_nocache(): Remove. OS_FILE_OVERWRITE, OS_FILE_CREATE_PATH: Remove (never used parameters). os_file_log_buffered(), os_file_log_maybe_unbuffered(): Helper functions. os_file_create_func(): When applicable, initially attempt to open files in O_DIRECT mode. For type==OS_LOG_FILE && create_mode != OS_FILE_CREATE we will first invoke stat(2) on the file name to find out if the size is compatible with O_DIRECT. If create_mode == OS_FILE_CREATE, we will invoke fstat(2) on the created log file afterwards, and may close and reopen the file in O_DIRECT mode if applicable. create_temp_file(): Support O_DIRECT. This is only used if O_TMPFILE is available and innodb_disable_sort_file_cache=ON (non-default value). Notably, that setting never worked on Microsoft Windows. row_merge_file_create_mode(): Split from row_merge_file_create_low(). Create a temporary file in the specified mode. Reviewed by: Vladislav Vaintroub
1 parent d73baa4 commit f4d7545

File tree

13 files changed

+181
-234
lines changed

13 files changed

+181
-234
lines changed

cmake/os/AIX.cmake

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,5 @@ ELSE()
3434
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -maix64 -pthread -mcmodel=large")
3535
ENDIF()
3636

37-
# fcntl(fd, F_SETFL, O_DIRECT) is not supported; O_DIRECT is an open(2) flag
38-
SET(HAVE_FCNTL_DIRECT 0 CACHE INTERNAL "")
39-
4037
# make it WARN by default, not AUTO (that implies -Werror)
4138
SET(MYSQL_MAINTAINER_MODE "WARN" CACHE STRING "Enable MariaDB maintainer-specific warnings. One of: NO (warnings are disabled) WARN (warnings are enabled) ERR (warnings are errors) AUTO (warnings are errors in Debug only)")

cmake/os/SunOS.cmake

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,6 @@ INCLUDE(CheckSymbolExists)
1717
INCLUDE(CheckCSourceRuns)
1818
INCLUDE(CheckCSourceCompiles)
1919

20-
# fcntl(fd, F_SETFL, O_DIRECT) is not supported,
21-
# and directio(3C) would only work on UFS or NFS, not ZFS.
22-
SET(HAVE_FCNTL_DIRECT 0 CACHE INTERNAL "")
23-
2420
# Enable 64 bit file offsets
2521
SET(_FILE_OFFSET_BITS 64)
2622

cmake/os/WindowsCache.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ SET(HAVE_EXECINFO_H CACHE INTERNAL "")
4444
SET(HAVE_FCHMOD CACHE INTERNAL "")
4545
SET(HAVE_FCNTL CACHE INTERNAL "")
4646
SET(HAVE_FCNTL_H 1 CACHE INTERNAL "")
47-
SET(HAVE_FCNTL_DIRECT 0 CACHE INTERNAL "")
4847
SET(HAVE_FCNTL_NONBLOCK CACHE INTERNAL "")
4948
SET(HAVE_FDATASYNC CACHE INTERNAL "")
5049
SET(HAVE_DECL_FDATASYNC CACHE INTERNAL "")

config.h.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
#cmakedefine HAVE_DLFCN_H 1
3131
#cmakedefine HAVE_EXECINFO_H 1
3232
#cmakedefine HAVE_FCNTL_H 1
33-
#cmakedefine HAVE_FCNTL_DIRECT 1
3433
#cmakedefine HAVE_FENV_H 1
3534
#cmakedefine HAVE_FLOAT_H 1
3635
#cmakedefine HAVE_FNMATCH_H 1

configure.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,6 @@ CHECK_SYMBOL_EXISTS(O_NONBLOCK "unistd.h;fcntl.h" HAVE_FCNTL_NONBLOCK)
706706
IF(NOT HAVE_FCNTL_NONBLOCK)
707707
SET(NO_FCNTL_NONBLOCK 1)
708708
ENDIF()
709-
CHECK_SYMBOL_EXISTS(O_DIRECT "fcntl.h" HAVE_FCNTL_DIRECT)
710709

711710
#
712711
# Test for how the C compiler does inline, if at all

extra/mariabackup/xtrabackup.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2419,12 +2419,12 @@ static bool innodb_init()
24192419
os_file_delete_if_exists_func(ib_logfile0.c_str(), nullptr);
24202420
os_file_t file= os_file_create_func(ib_logfile0.c_str(),
24212421
OS_FILE_CREATE, OS_FILE_NORMAL,
2422-
#if defined _WIN32 || defined HAVE_FCNTL_DIRECT
2422+
#if defined _WIN32 || defined O_DIRECT
24232423
OS_DATA_FILE_NO_O_DIRECT,
24242424
#else
24252425
OS_DATA_FILE,
24262426
#endif
2427-
false, &ret);
2427+
false, &ret);
24282428
if (!ret)
24292429
{
24302430
invalid_log:

mysys/mf_tempfile.c

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,11 @@ File create_temp_file(char *to, const char *dir, const char *prefix,
6666

6767
DBUG_ENTER("create_temp_file");
6868
DBUG_PRINT("enter", ("dir: %s, prefix: %s", dir ? dir : "(null)", prefix));
69+
#if !defined _WIN32 && defined O_DIRECT
70+
DBUG_ASSERT((mode & (O_EXCL | O_TRUNC | O_CREAT | O_RDWR | O_DIRECT)) == 0);
71+
#else
6972
DBUG_ASSERT((mode & (O_EXCL | O_TRUNC | O_CREAT | O_RDWR)) == 0);
73+
#endif
7074

7175
mode|= O_TRUNC | O_CREAT | O_RDWR; /* not O_EXCL, see Windows code below */
7276

@@ -118,16 +122,41 @@ File create_temp_file(char *to, const char *dir, const char *prefix,
118122

119123
if ((MyFlags & MY_TEMPORARY) && O_TMPFILE_works)
120124
{
121-
/* explictly don't use O_EXCL here has it has a different
122-
meaning with O_TMPFILE
125+
/*
126+
explicitly don't use O_EXCL here has it has a different
127+
meaning with O_TMPFILE
123128
*/
124-
if ((file= open(dir, (mode & ~O_CREAT) | O_TMPFILE | O_CLOEXEC,
125-
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) >= 0)
129+
const int flags= (mode & ~O_CREAT) | O_TMPFILE | O_CLOEXEC;
130+
const mode_t open_mode= S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
131+
# ifdef O_DIRECT
132+
static int O_TMPFILE_works_with_O_DIRECT= O_DIRECT;
133+
const int try_O_DIRECT= mode & O_TMPFILE_works_with_O_DIRECT;
134+
if (try_O_DIRECT)
135+
file= open(dir, flags | O_DIRECT, open_mode);
136+
else
137+
# endif
138+
file= open(dir, flags, open_mode);
139+
140+
if (file >= 0)
126141
{
142+
# ifdef O_DIRECT
143+
O_TMPFILE_works:
144+
# endif
127145
my_snprintf(to, FN_REFLEN, "%s/#sql/fd=%d", dir, file);
128146
file=my_register_filename(file, to, FILE_BY_O_TMPFILE,
129147
EE_CANTCREATEFILE, MyFlags);
130148
}
149+
# ifdef O_DIRECT
150+
else if (errno == EINVAL && try_O_DIRECT)
151+
{
152+
file= open(dir, flags, open_mode);
153+
if (file >= 0)
154+
{
155+
O_TMPFILE_works_with_O_DIRECT= 0;
156+
goto O_TMPFILE_works;
157+
}
158+
}
159+
# endif
131160
else if (errno == EOPNOTSUPP || errno == EINVAL)
132161
{
133162
my_printf_error(EE_CANTCREATEFILE, "O_TMPFILE is not supported on %s "

storage/innobase/fil/fil0fil.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ static bool fil_node_open_file_low(fil_node_t *node)
342342
ut_ad(node->space->is_closing());
343343
mysql_mutex_assert_owner(&fil_system.mutex);
344344
static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096, "compatibility");
345-
#if defined _WIN32 || defined HAVE_FCNTL_DIRECT
345+
#if defined _WIN32 || defined O_DIRECT
346346
ulint type;
347347
switch (FSP_FLAGS_GET_ZIP_SSIZE(node->space->flags)) {
348348
case 1:
@@ -1395,10 +1395,12 @@ ATTRIBUTE_COLD void fil_space_t::reopen_all()
13951395

13961396
ulint type= OS_DATA_FILE;
13971397

1398+
#if defined _WIN32 || defined O_DIRECT
13981399
switch (FSP_FLAGS_GET_ZIP_SSIZE(space.flags)) {
13991400
case 1: case 2:
14001401
type= OS_DATA_FILE_NO_O_DIRECT;
14011402
}
1403+
#endif
14021404

14031405
for (ulint count= 10000; count--;)
14041406
{
@@ -2019,7 +2021,7 @@ fil_ibd_create(
20192021

20202022
static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096,
20212023
"compatibility");
2022-
#if defined _WIN32 || defined HAVE_FCNTL_DIRECT
2024+
#if defined _WIN32 || defined O_DIRECT
20232025
ulint type;
20242026
switch (FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
20252027
case 1:

storage/innobase/handler/ha_innodb.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3973,7 +3973,7 @@ static int innodb_init_params()
39733973
} else if (innodb_flush_method >= 4 /* O_DIRECT */
39743974
IF_WIN(&& innodb_flush_method < 8 /* normal */,)) {
39753975
/* O_DIRECT and similar settings do nothing */
3976-
#ifdef HAVE_FCNTL_DIRECT
3976+
#ifdef O_DIRECT
39773977
} else if (srv_use_atomic_writes && my_may_have_atomic_write) {
39783978
/* If atomic writes are enabled, do the same as with
39793979
innodb_flush_method=O_DIRECT: retain the default settings */

storage/innobase/include/os0file.h

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,8 @@ enum os_file_create_t {
113113
doesn't exist, error) */
114114
OS_FILE_CREATE, /*!< to create new file (if
115115
exists, error) */
116-
OS_FILE_OVERWRITE, /*!< to create a new file, if exists
117-
the overwrite old file */
118116
OS_FILE_OPEN_RAW, /*!< to open a raw device or disk
119117
partition */
120-
OS_FILE_CREATE_PATH, /*!< to create the directories */
121118
OS_FILE_OPEN_RETRY, /*!< open with retry */
122119

123120
/** Flags that can be combined with the above values. Please ensure
@@ -144,7 +141,7 @@ static const ulint OS_FILE_NORMAL = 62;
144141
/** Types for file create @{ */
145142
static constexpr ulint OS_DATA_FILE = 100;
146143
static constexpr ulint OS_LOG_FILE = 101;
147-
#if defined _WIN32 || defined HAVE_FCNTL_DIRECT
144+
#if defined _WIN32 || defined O_DIRECT
148145
static constexpr ulint OS_DATA_FILE_NO_O_DIRECT = 103;
149146
#endif
150147
/* @} */
@@ -375,22 +372,6 @@ os_file_create_simple_no_error_handling_func(
375372
bool* success)
376373
MY_ATTRIBUTE((warn_unused_result));
377374

378-
#ifndef HAVE_FCNTL_DIRECT
379-
#define os_file_set_nocache(fd, file_name, operation_name) do{}while(0)
380-
#else
381-
/** Tries to disable OS caching on an opened file descriptor.
382-
@param[in] fd file descriptor to alter
383-
@param[in] file_name file name, used in the diagnostic message
384-
@param[in] name "open" or "create"; used in the diagnostic
385-
message */
386-
void
387-
os_file_set_nocache(
388-
/*================*/
389-
int fd, /*!< in: file descriptor to alter */
390-
const char* file_name,
391-
const char* operation_name);
392-
#endif
393-
394375
#ifndef _WIN32 /* On Microsoft Windows, mandatory locking is used */
395376
/** Obtain an exclusive lock on a file.
396377
@param fd file descriptor

0 commit comments

Comments
 (0)