Skip to content

Commit 0cd2b4c

Browse files
committed
MDEV-22177 more fsync() -> fdatasync() in InnoDB
Replace all fsync() with fdatasync() when possible (e.g. On Linux) InnoDB doesn't care about file timestamps. So, to achieve a better performance it makes sense to use fdatasync() everywhere. file_io::flush(): renamed from flush_data_only() os_file_flush_data(): removed os_file_sync_posix(): renamed from os_file_fsync_posix(). Now it uses fdatasync() when it's available.
1 parent 9bd98f4 commit 0cd2b4c

File tree

6 files changed

+57
-109
lines changed

6 files changed

+57
-109
lines changed

storage/innobase/handler/ha_innodb.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18566,7 +18566,7 @@ checkpoint_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
1856618566
+ SIZE_OF_FILE_CHECKPOINT
1856718567
< log_sys.get_lsn()) {
1856818568
log_make_checkpoint();
18569-
log_sys.log.flush_data_only();
18569+
log_sys.log.flush();
1857018570
}
1857118571

1857218572
dberr_t err = fil_write_flushed_lsn(log_sys.get_lsn());

storage/innobase/include/log0log.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -442,9 +442,9 @@ class file_io
442442
virtual dberr_t read(os_offset_t offset, span<byte> buf) noexcept= 0;
443443
virtual dberr_t write(const char *path, os_offset_t offset,
444444
span<const byte> buf) noexcept= 0;
445-
virtual dberr_t flush_data_only() noexcept= 0;
445+
virtual dberr_t flush() noexcept= 0;
446446

447-
/** Durable writes doesn't require calling flush_data_only() */
447+
/** Durable writes doesn't require calling flush() */
448448
bool writes_are_durable() const noexcept { return m_durable_writes; }
449449

450450
protected:
@@ -468,7 +468,7 @@ class file_os_io final: public file_io
468468
dberr_t read(os_offset_t offset, span<byte> buf) noexcept final;
469469
dberr_t write(const char *path, os_offset_t offset,
470470
span<const byte> buf) noexcept final;
471-
dberr_t flush_data_only() noexcept final;
471+
dberr_t flush() noexcept final;
472472

473473
private:
474474
pfs_os_file_t m_fd{OS_FILE_CLOSED};
@@ -490,7 +490,7 @@ class log_file_t
490490
dberr_t read(os_offset_t offset, span<byte> buf) noexcept;
491491
bool writes_are_durable() const noexcept;
492492
dberr_t write(os_offset_t offset, span<const byte> buf) noexcept;
493-
dberr_t flush_data_only() noexcept;
493+
dberr_t flush() noexcept;
494494

495495
private:
496496
std::unique_ptr<file_io> m_file;
@@ -596,14 +596,14 @@ struct log_t{
596596
@param[in] offset offset in log file
597597
@param[in] buf buffer where to read */
598598
void read(os_offset_t offset, span<byte> buf);
599-
/** Tells whether writes require calling flush_data_only() */
599+
/** Tells whether writes require calling flush() */
600600
bool writes_are_durable() const noexcept;
601601
/** writes buffer to log file
602602
@param[in] offset offset in log file
603603
@param[in] buf buffer from which to write */
604604
void write(os_offset_t offset, span<byte> buf);
605605
/** flushes OS page cache (excluding metadata!) for log file */
606-
void flush_data_only();
606+
void flush();
607607
/** closes log file */
608608
void close_file();
609609

storage/innobase/include/os0file.h

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -744,9 +744,6 @@ The wrapper functions have the prefix of "innodb_". */
744744
# define os_file_flush(file) \
745745
pfs_os_file_flush_func(file, __FILE__, __LINE__)
746746

747-
#define os_file_flush_data(file) \
748-
pfs_os_file_flush_data_func(file, __FILE__, __LINE__)
749-
750747
# define os_file_rename(key, oldpath, newpath) \
751748
pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
752749

@@ -992,17 +989,6 @@ pfs_os_file_flush_func(
992989
const char* src_file,
993990
uint src_line);
994991

995-
/** NOTE! Please use the corresponding macro os_file_flush_data(), not directly
996-
this function!
997-
This is the performance schema instrumented wrapper function for
998-
os_file_flush_data() which flushes only(!) data (excluding metadata) from OS
999-
page cache of a given file to the disk.
1000-
@param[in] file Open file handle
1001-
@param[in] src_file file name where func invoked
1002-
@param[in] src_line line where the func invoked
1003-
@return true if success */
1004-
bool pfs_os_file_flush_data_func(pfs_os_file_t file, const char *src_file,
1005-
uint src_line);
1006992

1007993
/** NOTE! Please use the corresponding macro os_file_rename(), not directly
1008994
this function!
@@ -1098,8 +1084,6 @@ to original un-instrumented file I/O APIs */
10981084

10991085
# define os_file_flush(file) os_file_flush_func(file)
11001086

1101-
#define os_file_flush_data(file) os_file_flush_data_func(file)
1102-
11031087
# define os_file_rename(key, oldpath, newpath) \
11041088
os_file_rename_func(oldpath, newpath)
11051089

@@ -1179,14 +1163,6 @@ bool
11791163
os_file_flush_func(
11801164
os_file_t file);
11811165

1182-
/** NOTE! Use the corresponding macro os_file_flush_data(), not directly this
1183-
function!
1184-
Flushes only(!) data (excluding metadata) from OS page cache of a given file to
1185-
the disk.
1186-
@param[in] file handle to a file
1187-
@return true if success */
1188-
bool os_file_flush_data_func(os_file_t file);
1189-
11901166
/** Retrieves the last error number if an error occurs in a file io function.
11911167
The number should be retrieved before any other OS calls (because they may
11921168
overwrite the error number). If the number is not known to this program,

storage/innobase/log/log0log.cc

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -633,9 +633,9 @@ dberr_t file_os_io::write(const char *path, os_offset_t offset,
633633
buf.size());
634634
}
635635

636-
dberr_t file_os_io::flush_data_only() noexcept
636+
dberr_t file_os_io::flush() noexcept
637637
{
638-
return os_file_flush_data(m_fd) ? DB_SUCCESS : DB_ERROR;
638+
return os_file_flush(m_fd) ? DB_SUCCESS : DB_ERROR;
639639
}
640640

641641
#ifdef HAVE_PMEM
@@ -674,7 +674,7 @@ class file_pmem_io final : public file_io
674674
pmem_memcpy_persist(m_file.data() + offset, buf.data(), buf.size());
675675
return DB_SUCCESS;
676676
}
677-
dberr_t flush_data_only() noexcept final
677+
dberr_t flush() noexcept final
678678
{
679679
ut_ad(0);
680680
return DB_SUCCESS;
@@ -746,10 +746,10 @@ dberr_t log_file_t::write(os_offset_t offset, span<const byte> buf) noexcept
746746
return m_file->write(m_path.c_str(), offset, buf);
747747
}
748748

749-
dberr_t log_file_t::flush_data_only() noexcept
749+
dberr_t log_file_t::flush() noexcept
750750
{
751751
ut_ad(is_opened());
752-
return m_file->flush_data_only();
752+
return m_file->flush();
753753
}
754754

755755
void log_t::file::open_file(std::string path)
@@ -788,7 +788,7 @@ void log_t::file::write_header_durable(lsn_t lsn)
788788

789789
log_sys.log.write(0, buf);
790790
if (!log_sys.log.writes_are_durable())
791-
log_sys.log.flush_data_only();
791+
log_sys.log.flush();
792792
}
793793

794794
void log_t::file::read(os_offset_t offset, span<byte> buf)
@@ -813,11 +813,11 @@ void log_t::file::write(os_offset_t offset, span<byte> buf)
813813
log_sys.n_log_ios++;
814814
}
815815

816-
void log_t::file::flush_data_only()
816+
void log_t::file::flush()
817817
{
818818
log_sys.pending_flushes.fetch_add(1, std::memory_order_acquire);
819-
if (const dberr_t err= fd.flush_data_only())
820-
ib::fatal() << "flush_data_only(" << fd.get_path() << ") returned " << err;
819+
if (const dberr_t err= fd.flush())
820+
ib::fatal() << "flush(" << fd.get_path() << ") returned " << err;
821821
log_sys.pending_flushes.fetch_sub(1, std::memory_order_release);
822822
log_sys.flushes.fetch_add(1, std::memory_order_release);
823823
}
@@ -936,7 +936,7 @@ log_write_buf(
936936
and invoke log_mutex_enter(). */
937937
static void log_write_flush_to_disk_low(lsn_t lsn)
938938
{
939-
log_sys.log.flush_data_only();
939+
log_sys.log.flush();
940940
ut_a(lsn >= log_sys.get_flushed_lsn());
941941
log_sys.set_flushed_lsn(lsn);
942942
}
@@ -1294,7 +1294,7 @@ void log_write_checkpoint_info(lsn_t end_lsn)
12941294
: LOG_CHECKPOINT_1,
12951295
{buf, OS_FILE_LOG_BLOCK_SIZE});
12961296

1297-
log_sys.log.flush_data_only();
1297+
log_sys.log.flush();
12981298

12991299
log_mutex_enter();
13001300

@@ -1742,7 +1742,7 @@ logs_empty_and_mark_files_at_shutdown(void)
17421742

17431743
/* Ensure that all buffered changes are written to the
17441744
redo log before fil_close_all_files(). */
1745-
log_sys.log.flush_data_only();
1745+
log_sys.log.flush();
17461746
} else {
17471747
lsn = recv_sys.recovered_lsn;
17481748
}

storage/innobase/os/os0file.cc

Lines changed: 37 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ Created 10/21/1995 Heikki Tuuri
7878
#include <my_sys.h>
7979
#endif
8080

81+
#include <thread>
82+
#include <chrono>
83+
8184
/* Per-IO operation environment*/
8285
class io_slots
8386
{
@@ -879,55 +882,53 @@ os_file_get_last_error_low(
879882
return(OS_FILE_ERROR_MAX + err);
880883
}
881884

882-
/** Wrapper to fsync(2) that retries the call on some errors.
885+
/** Wrapper to fsync() or fdatasync() that retries the call on some errors.
883886
Returns the value 0 if successful; otherwise the value -1 is returned and
884887
the global variable errno is set to indicate the error.
885888
@param[in] file open file handle
886889
@return 0 if success, -1 otherwise */
887-
static
888-
int
889-
os_file_fsync_posix(
890-
os_file_t file)
890+
static int os_file_sync_posix(os_file_t file)
891891
{
892-
ulint failures = 0;
893-
894-
for (;;) {
895-
896-
++os_n_fsyncs;
897-
898-
int ret = fsync(file);
892+
#if !defined(HAVE_FDATASYNC) || HAVE_DECL_FDATASYNC == 0
893+
auto func= fsync;
894+
auto func_name= "fsync()";
895+
#else
896+
auto func= fdatasync;
897+
auto func_name= "fdatasync()";
898+
#endif
899899

900-
if (ret == 0) {
901-
return(ret);
902-
}
900+
ulint failures= 0;
903901

904-
switch(errno) {
905-
case ENOLCK:
902+
for (;;)
903+
{
904+
++os_n_fsyncs;
906905

907-
++failures;
908-
ut_a(failures < 1000);
906+
int ret= func(file);
909907

910-
if (!(failures % 100)) {
908+
if (ret == 0)
909+
return ret;
911910

912-
ib::warn()
913-
<< "fsync(): "
914-
<< "No locks available; retrying";
915-
}
911+
switch (errno)
912+
{
913+
case ENOLCK:
914+
++failures;
915+
ut_a(failures < 1000);
916916

917-
/* 0.2 sec */
918-
os_thread_sleep(200000);
919-
break;
917+
if (!(failures % 100))
918+
ib::warn() << func_name << ": No locks available; retrying";
920919

921-
case EINTR:
920+
std::this_thread::sleep_for(std::chrono::milliseconds(200));
921+
break;
922922

923-
++failures;
924-
ut_a(failures < 2000);
925-
break;
923+
case EINTR:
924+
++failures;
925+
ut_a(failures < 2000);
926+
break;
926927

927-
default:
928-
ib::fatal() << "fsync() returned " << errno;
929-
}
930-
}
928+
default:
929+
ib::fatal() << func_name << " returned " << errno;
930+
}
931+
}
931932
}
932933

933934
/** Check the existence and type of the given file.
@@ -988,7 +989,7 @@ os_file_flush_func(
988989
int ret;
989990

990991
WAIT_ALLOW_WRITES();
991-
ret = os_file_fsync_posix(file);
992+
ret = os_file_sync_posix(file);
992993

993994
if (ret == 0) {
994995
return(true);
@@ -4604,32 +4605,3 @@ os_normalize_path(
46044605
}
46054606
}
46064607
}
4607-
4608-
bool os_file_flush_data_func(os_file_t file) {
4609-
#if defined(_WIN32) || !defined(HAVE_FDATASYNC) || HAVE_DECL_FDATASYNC == 0
4610-
return os_file_flush_func(file);
4611-
#else
4612-
bool success= fdatasync(file) != -1;
4613-
if (!success) {
4614-
ib::error() << "fdatasync() errno: " << errno;
4615-
}
4616-
return success;
4617-
#endif
4618-
}
4619-
4620-
#ifdef UNIV_PFS_IO
4621-
bool pfs_os_file_flush_data_func(pfs_os_file_t file, const char *src_file,
4622-
uint src_line)
4623-
{
4624-
PSI_file_locker_state state;
4625-
struct PSI_file_locker *locker= NULL;
4626-
4627-
register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC, src_file,
4628-
src_line);
4629-
4630-
bool success= os_file_flush_data_func(file);
4631-
4632-
register_pfs_file_io_end(locker, 0);
4633-
return success;
4634-
}
4635-
#endif

storage/innobase/srv/srv0start.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1033,7 +1033,7 @@ static lsn_t srv_prepare_to_delete_redo_log_file(bool old_exists)
10331033

10341034
if (flushed_lsn != log_sys.get_flushed_lsn()) {
10351035
log_write_up_to(flushed_lsn, false);
1036-
log_sys.log.flush_data_only();
1036+
log_sys.log.flush();
10371037
}
10381038

10391039
ut_ad(flushed_lsn == log_get_lsn());

0 commit comments

Comments
 (0)