From 1006fb7efafe634165214f065dd8cff05a5d10c0 Mon Sep 17 00:00:00 2001 From: Filipe David Manana Date: Thu, 22 Apr 2010 23:40:26 +0100 Subject: [PATCH] Added new function: file:datasync/1. This function invokes the POSIX system call "int fdatasync(int fd)". This system call is similar to "fsync" but, unlike fsync, it does not update the metadata associated with the file (like the access time for example). It's used by many DBMSs (MySQL and SQLite of example) to increase disk IO performance, as it avoids disk seeks and disk write operations compared to fsync. More details on it at: http://linux.die.net/man/2/fdatasync An example, from the MySQL source: http://bazaar.launchpad.net/~mysql/mysql-server/mysql-5.1-telco-6.1/annotate/head%3A/mysys/my_sync.c#L61 This new function just calls fsync on systems not implementing fdatasync. --- erts/configure.in | 6 +++++ erts/emulator/drivers/common/efile_drv.c | 23 +++++++++++++++++ erts/emulator/drivers/common/erl_efile.h | 1 + erts/emulator/drivers/common/ram_file_drv.c | 8 ++++++ erts/emulator/drivers/unix/unix_efile.c | 11 ++++++++ erts/emulator/drivers/win32/win_efile.c | 9 +++++++ erts/preloaded/src/prim_file.erl | 6 ++++- lib/kernel/doc/src/file.xml | 27 ++++++++++++++++++++ lib/kernel/src/file.erl | 12 ++++++++- lib/kernel/src/file_io_server.erl | 8 ++++++ lib/kernel/src/ram_file.erl | 5 +++- lib/kernel/test/file_SUITE.erl | 28 +++++++++++++++++++-- lib/kernel/test/prim_file_SUITE.erl | 22 ++++++++++++++-- 13 files changed, 159 insertions(+), 7 deletions(-) diff --git a/erts/configure.in b/erts/configure.in index e5202b75cba4..7767cd59da66 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -1757,6 +1757,12 @@ fi dnl Need by run_erl. AC_CHECK_FUNCS([openpty]) +dnl fdatasync syscall (Unix only) +AC_CHECK_FUNCS([fdatasync]) + +dnl Find which C libraries are required to use fdatasync +AC_SEARCH_LIBS(fdatasync, [rt]) + dnl ---------------------------------------------------------------------- dnl Checks for features/quirks in the system that affects Erlang. dnl ---------------------------------------------------------------------- diff --git a/erts/emulator/drivers/common/efile_drv.c b/erts/emulator/drivers/common/efile_drv.c index cc4421b49c99..dc5d219d90a4 100644 --- a/erts/emulator/drivers/common/efile_drv.c +++ b/erts/emulator/drivers/common/efile_drv.c @@ -53,6 +53,7 @@ #define FILE_IPREAD 27 #define FILE_ALTNAME 28 #define FILE_READ_LINE 29 +#define FILE_FDATASYNC 30 /* Return codes */ @@ -883,6 +884,15 @@ static void invoke_chdir(void *data) invoke_name(data, efile_chdir); } +static void invoke_fdatasync(void *data) +{ + struct t_data *d = (struct t_data *) data; + int fd = (int) d->fd; + + d->again = 0; + d->result_ok = efile_fdatasync(&d->errInfo, fd); +} + static void invoke_fsync(void *data) { struct t_data *d = (struct t_data *) data; @@ -1919,6 +1929,7 @@ file_async_ready(ErlDrvData e, ErlDrvThreadData data) case FILE_RMDIR: case FILE_CHDIR: case FILE_DELETE: + case FILE_FDATASYNC: case FILE_FSYNC: case FILE_TRUNCATE: case FILE_LINK: @@ -2209,6 +2220,18 @@ file_output(ErlDrvData e, char* buf, int count) goto done; } + case FILE_FDATASYNC: + { + d = EF_SAFE_ALLOC(sizeof(struct t_data)); + + d->fd = fd; + d->command = command; + d->invoke = invoke_fdatasync; + d->free = free_data; + d->level = 2; + goto done; + } + case FILE_FSYNC: { d = EF_SAFE_ALLOC(sizeof(struct t_data)); diff --git a/erts/emulator/drivers/common/erl_efile.h b/erts/emulator/drivers/common/erl_efile.h index 9aa941e55068..6821a0e2ee16 100644 --- a/erts/emulator/drivers/common/erl_efile.h +++ b/erts/emulator/drivers/common/erl_efile.h @@ -126,6 +126,7 @@ int efile_readdir(Efile_error* errInfo, char* name, int efile_openfile(Efile_error* errInfo, char* name, int flags, int* pfd, Sint64* pSize); void efile_closefile(int fd); +int efile_fdatasync(Efile_error* errInfo, int fd); int efile_fsync(Efile_error* errInfo, int fd); int efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo, char *name, int info_for_link); diff --git a/erts/emulator/drivers/common/ram_file_drv.c b/erts/emulator/drivers/common/ram_file_drv.c index 4a39a156e6bb..d4e547ade638 100644 --- a/erts/emulator/drivers/common/ram_file_drv.c +++ b/erts/emulator/drivers/common/ram_file_drv.c @@ -35,6 +35,7 @@ #define RAM_FILE_TRUNCATE 14 #define RAM_FILE_PREAD 17 #define RAM_FILE_PWRITE 18 +#define RAM_FILE_FDATASYNC 19 /* other operations */ #define RAM_FILE_GET 30 @@ -558,6 +559,13 @@ static void rfile_command(ErlDrvData e, char* buf, int count) numeric_reply(f, 0); /* 0 is not used */ break; + case RAM_FILE_FDATASYNC: + if (f->flags == 0) + error_reply(f, EBADF); + else + reply(f, 1, 0); + break; + case RAM_FILE_FSYNC: if (f->flags == 0) error_reply(f, EBADF); diff --git a/erts/emulator/drivers/unix/unix_efile.c b/erts/emulator/drivers/unix/unix_efile.c index d395b6869107..82e8170510aa 100644 --- a/erts/emulator/drivers/unix/unix_efile.c +++ b/erts/emulator/drivers/unix/unix_efile.c @@ -818,6 +818,17 @@ efile_closefile(int fd) close(fd); } +int +efile_fdatasync(Efile_error *errInfo, /* Where to return error codes. */ + int fd) /* File descriptor for file to sync data. */ +{ +#ifdef HAVE_FDATASYNC + return check_error(fdatasync(fd), errInfo); +#else + return efile_fsync(errInfo, fd); +#endif +} + int efile_fsync(Efile_error *errInfo, /* Where to return error codes. */ int fd) /* File descriptor for file to sync. */ diff --git a/erts/emulator/drivers/win32/win_efile.c b/erts/emulator/drivers/win32/win_efile.c index 89aaad31da1a..cdf3bb424cdd 100644 --- a/erts/emulator/drivers/win32/win_efile.c +++ b/erts/emulator/drivers/win32/win_efile.c @@ -763,6 +763,15 @@ int fd; /* File descriptor for file to close. */ CloseHandle((HANDLE) fd); } +int +efile_fdatasync(errInfo, fd) +Efile_error* errInfo; /* Where to return error codes. */ +int fd; /* File descriptor for file to sync. */ +{ + /* Not available in Windows, just call regular fsync */ + return efile_fsync(errInfo, fd); +} + int efile_fsync(errInfo, fd) Efile_error* errInfo; /* Where to return error codes. */ diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index 43e6f6cd886b..53c741e66e50 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -25,7 +25,7 @@ %%% Interface towards a single file's contents. Uses ?FD_DRV. %% Generic file contents operations --export([open/2, close/1, sync/1, position/2, truncate/1, +-export([open/2, close/1, datasync/1, sync/1, position/2, truncate/1, write/2, pwrite/2, pwrite/3, read/2, read_line/1, pread/2, pread/3, copy/3]). %% Specialized file operations @@ -96,6 +96,7 @@ -define(FILE_IPREAD, 27). -define(FILE_ALTNAME, 28). -define(FILE_READ_LINE, 29). +-define(FILE_FDATASYNC, 30). %% Driver responses -define(FILE_RESP_OK, 0). @@ -292,6 +293,9 @@ pwrite(#file_descriptor{module = ?MODULE}, _, _) -> {error, badarg}. +%% Returns {error, Reason} | ok. +datasync(#file_descriptor{module = ?MODULE, data = {Port, _}}) -> + drv_command(Port, [?FILE_FDATASYNC]). %% Returns {error, Reason} | ok. sync(#file_descriptor{module = ?MODULE, data = {Port, _}}) -> diff --git a/lib/kernel/doc/src/file.xml b/lib/kernel/doc/src/file.xml index 50f9722a1c52..c94df62d1f49 100644 --- a/lib/kernel/doc/src/file.xml +++ b/lib/kernel/doc/src/file.xml @@ -1640,6 +1640,33 @@ f.txt: {person, "kalle", 25}. + + datasync(IoDevice) -> ok | {error, Reason} + Synchronizes the in-memory data of a file, ignoring most of its metadata, with that on the physical medium + + IoDevice = io_device() + Reason = ext_posix() | terminated + + +

Makes sure that any buffers kept by the operating system + (not by the Erlang runtime system) are written to disk. In + many ways it's resembles fsync but it not requires to update + some of file's metadata such as the access time. On + some platforms, this function might have no effect.

+

Applications that access databases or log files often write + a tiny data fragment (e.g., one line in a log file) and then + call fsync() immediately in order to ensure that the written + data is physically stored on the harddisk. Unfortunately, fsync() + will always initiate two write operations: one for the newly + written data and another one in order to update the modification + time stored in the inode. If the modification time is not a part + of the transaction concept fdatasync() can be used to avoid + unnecessary inode disk write operations.

+

Available only in some POSIX systems. This call results in a + call to fsync(), or has no effect, in systems not implementing + the fdatasync syscall.

+
+
truncate(IoDevice) -> ok | {error, Reason} Truncate a file diff --git a/lib/kernel/src/file.erl b/lib/kernel/src/file.erl index 46ffa9d70893..5da359fdf17f 100644 --- a/lib/kernel/src/file.erl +++ b/lib/kernel/src/file.erl @@ -40,7 +40,7 @@ read/2, write/2, pread/2, pread/3, pwrite/2, pwrite/3, read_line/1, - position/2, truncate/1, sync/1, + position/2, truncate/1, datasync/1, sync/1, copy/2, copy/3]). %% High level operations -export([consult/1, path_consult/2]). @@ -472,6 +472,16 @@ pwrite(#file_descriptor{module = Module} = Handle, Offs, Bytes) -> pwrite(_, _, _) -> {error, badarg}. +-spec datasync(File :: io_device()) -> 'ok' | {'error', posix()}. + +datasync(File) when is_pid(File) -> + R = file_request(File, datasync), + wait_file_reply(File, R); +datasync(#file_descriptor{module = Module} = Handle) -> + Module:datasync(Handle); +datasync(_) -> + {error, badarg}. + -spec sync(File :: io_device()) -> 'ok' | {'error', posix()}. sync(File) when is_pid(File) -> diff --git a/lib/kernel/src/file_io_server.erl b/lib/kernel/src/file_io_server.erl index 37e803c49375..425c8ec98305 100644 --- a/lib/kernel/src/file_io_server.erl +++ b/lib/kernel/src/file_io_server.erl @@ -220,6 +220,14 @@ file_request({pwrite,At,Data}, Reply -> std_reply(Reply, State) end; +file_request(datasync, + #state{handle=Handle}=State) -> + case ?PRIM_FILE:datasync(Handle) of + {error,_}=Reply -> + {stop,normal,Reply,State}; + Reply -> + {reply,Reply,State} + end; file_request(sync, #state{handle=Handle}=State) -> case ?PRIM_FILE:sync(Handle) of diff --git a/lib/kernel/src/ram_file.erl b/lib/kernel/src/ram_file.erl index d99665094878..0230cb4e0a94 100644 --- a/lib/kernel/src/ram_file.erl +++ b/lib/kernel/src/ram_file.erl @@ -24,7 +24,7 @@ -export([open/2, close/1]). -export([write/2, read/2, copy/3, pread/2, pread/3, pwrite/2, pwrite/3, - position/2, truncate/1, sync/1]). + position/2, truncate/1, datasync/1, sync/1]). %% Specialized file operations -export([get_size/1, get_file/1, set_file/2, get_file_close/1]). @@ -60,6 +60,7 @@ -define(RAM_FILE_TRUNCATE, 14). -define(RAM_FILE_PREAD, 17). -define(RAM_FILE_PWRITE, 18). +-define(RAM_FILE_FDATASYNC, 19). %% Other operations -define(RAM_FILE_GET, 30). @@ -167,6 +168,8 @@ copy(#file_descriptor{module = ?MODULE} = Source, %% XXX Should be moved down to the driver for optimization. file:copy_opened(Source, Dest, Length). +datasync(#file_descriptor{module = ?MODULE, data = Port}) -> + call_port(Port, <>). sync(#file_descriptor{module = ?MODULE, data = Port}) -> call_port(Port, <>). diff --git a/lib/kernel/test/file_SUITE.erl b/lib/kernel/test/file_SUITE.erl index d01e1f1fcfc5..fd06c5c0bb49 100644 --- a/lib/kernel/test/file_SUITE.erl +++ b/lib/kernel/test/file_SUITE.erl @@ -52,7 +52,7 @@ old_modes/1, new_modes/1, path_open/1, open_errors/1]). -export([file_info/1, file_info_basic_file/1, file_info_basic_directory/1, file_info_bad/1, file_info_times/1, file_write_file_info/1]). --export([rename/1, access/1, truncate/1, sync/1, +-export([rename/1, access/1, truncate/1, datasync/1, sync/1, read_write/1, pread_write/1, append/1]). -export([errors/1, e_delete/1, e_rename/1, e_make_dir/1, e_del_dir/1]). -export([otp_5814/1]). @@ -374,7 +374,7 @@ win_cur_dir_1(_Config) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -files(suite) -> [open,pos,file_info,consult,eval,script,truncate,sync]. +files(suite) -> [open,pos,file_info,consult,eval,script,truncate,sync,datasync]. open(suite) -> [open1,old_modes,new_modes,path_open,close,access,read_write, pread_write,append,open_errors]. @@ -1352,6 +1352,30 @@ truncate(Config) when is_list(Config) -> ok. +datasync(suite) -> []; +datasync(doc) -> "Tests that ?FILE_MODULE:datasync/1 at least doesn't crash."; +datasync(Config) when is_list(Config) -> + ?line Dog = test_server:timetrap(test_server:seconds(5)), + ?line PrivDir = ?config(priv_dir, Config), + ?line Sync = filename:join(PrivDir, + atom_to_list(?MODULE) + ++"_sync.fil"), + + %% Raw open. + ?line {ok, Fd} = ?FILE_MODULE:open(Sync, [write, raw]), + ?line ok = ?FILE_MODULE:datasync(Fd), + ?line ok = ?FILE_MODULE:close(Fd), + + %% Ordinary open. + ?line {ok, Fd2} = ?FILE_MODULE:open(Sync, [write]), + ?line ok = ?FILE_MODULE:datasync(Fd2), + ?line ok = ?FILE_MODULE:close(Fd2), + + ?line [] = flush(), + ?line test_server:timetrap_cancel(Dog), + ok. + + sync(suite) -> []; sync(doc) -> "Tests that ?FILE_MODULE:sync/1 at least doesn't crash."; sync(Config) when is_list(Config) -> diff --git a/lib/kernel/test/prim_file_SUITE.erl b/lib/kernel/test/prim_file_SUITE.erl index 860aeecbf489..9de33aec80d3 100644 --- a/lib/kernel/test/prim_file_SUITE.erl +++ b/lib/kernel/test/prim_file_SUITE.erl @@ -34,7 +34,7 @@ file_info_times_a/1, file_info_times_b/1, file_write_file_info_a/1, file_write_file_info_b/1]). -export([rename_a/1, rename_b/1, - access/1, truncate/1, sync/1, + access/1, truncate/1, datasync/1, sync/1, read_write/1, pread_write/1, append/1]). -export([errors/1, e_delete/1, e_rename/1, e_make_dir/1, e_del_dir/1]). @@ -377,7 +377,7 @@ win_cur_dir_1(_Config, Handle) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -files(suite) -> [open,pos,file_info,truncate,sync]. +files(suite) -> [open,pos,file_info,truncate,sync,datasync]. open(suite) -> [open1,modes,close,access,read_write, pread_write,append]. @@ -1061,6 +1061,24 @@ truncate(Config) when is_list(Config) -> ok. +datasync(suite) -> []; +datasync(doc) -> "Tests that ?PRIM_FILE:datasync/1 at least doesn't crash."; +datasync(Config) when is_list(Config) -> + ?line Dog = test_server:timetrap(test_server:seconds(5)), + ?line PrivDir = ?config(priv_dir, Config), + ?line Sync = filename:join(PrivDir, + atom_to_list(?MODULE) + ++"_sync.fil"), + + %% Raw open. + ?line {ok, Fd} = ?PRIM_FILE:open(Sync, [write]), + ?line ok = ?PRIM_FILE:datasync(Fd), + ?line ok = ?PRIM_FILE:close(Fd), + + ?line test_server:timetrap_cancel(Dog), + ok. + + sync(suite) -> []; sync(doc) -> "Tests that ?PRIM_FILE:sync/1 at least doesn't crash."; sync(Config) when is_list(Config) ->