Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FileStore: use pwritev instead of lseek+writev #7349

Merged
merged 4 commits into from Feb 9, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Expand Up @@ -33,6 +33,7 @@ CHECK_FUNCTION_EXISTS(posix_fallocate HAVE_POSIX_FALLOCATE)
CHECK_FUNCTION_EXISTS(syncfs HAVE_SYS_SYNCFS)
CHECK_FUNCTION_EXISTS(sync_file_range HAVE_SYNC_FILE_RANGE)
CHECK_FUNCTION_EXISTS(mallinfo HAVE_MALLINFO)
CHECK_FUNCTION_EXISTS(pwritev HAVE_PWRITEV)
CHECK_INCLUDE_FILES("arpa/inet.h" HAVE_ARPA_INET_H)
CHECK_INCLUDE_FILES("boost/random/discrete_distribution.hpp" HAVE_BOOST_RANDOM_DISCRETE_DISTRIBUTION)
CHECK_INCLUDE_FILES("dirent.h" HAVE_DIRENT_H)
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Expand Up @@ -1065,6 +1065,7 @@ AC_CHECK_HEADERS([sys/prctl.h])
AC_CHECK_FUNCS([prctl])
AC_CHECK_FUNCS([pipe2])
AC_CHECK_FUNCS([posix_fadvise])
AC_CHECK_FUNCS([pwritev], AC_DEFINE([HAVE_PWRITEV], [1], [we have pwritev]))

AC_MSG_CHECKING([for fdatasync])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
Expand Down
68 changes: 68 additions & 0 deletions src/common/buffer.cc
Expand Up @@ -1952,6 +1952,46 @@ int buffer::list::write_file(const char *fn, int mode)
return 0;
}

static int do_writev(int fd, struct iovec *vec, uint64_t offset, unsigned veclen, unsigned bytes)
{
ssize_t r = 0;
while (bytes > 0) {
#ifdef HAVE_PWRITEV
r = ::pwritev(fd, vec, veclen, offset);
#else
r = ::lseek64(fd, offset, SEEK_SET);
if (r != offset) {
r = -errno;
return r;
}
r = ::writev(fd, vec, veclen);
#endif
if (r < 0) {
if (errno == EINTR)
continue;
return -errno;
}

bytes -= r;
offset += r;
if (bytes == 0) break;

while (r > 0) {
if (vec[0].iov_len <= (size_t)r) {
// drain this whole item
r -= vec[0].iov_len;
++vec;
--veclen;
} else {
vec[0].iov_base = (char *)vec[0].iov_base + r;
vec[0].iov_len -= r;
break;
}
}
}
return 0;
}

int buffer::list::write_fd(int fd) const
{
if (can_zero_copy())
Expand Down Expand Up @@ -2007,6 +2047,34 @@ int buffer::list::write_fd(int fd) const
return 0;
}

int buffer::list::write_fd(int fd, uint64_t offset) const
{
iovec iov[IOV_MAX];

std::list<ptr>::const_iterator p = _buffers.begin();
uint64_t left_pbrs = _buffers.size();
while (left_pbrs) {
ssize_t bytes = 0;
unsigned iovlen = 0;
uint64_t size = MIN(left_pbrs, IOV_MAX);
left_pbrs -= size;
while (size > 0) {
iov[iovlen].iov_base = (void *)p->c_str();
iov[iovlen].iov_len = p->length();
iovlen++;
bytes += p->length();
++p;
size--;
}

int r = do_writev(fd, iov, offset, iovlen, bytes);
if (r < 0)
return r;
offset += bytes;
}
return 0;
}

void buffer::list::prepare_iov(std::vector<iovec> *piov) const
{
piov->resize(_buffers.size());
Expand Down
1 change: 1 addition & 0 deletions src/include/buffer.h
Expand Up @@ -528,6 +528,7 @@ namespace buffer CEPH_BUFFER_API {
int read_fd_zero_copy(int fd, size_t len);
int write_file(const char *fn, int mode=0644);
int write_fd(int fd) const;
int write_fd(int fd, uint64_t offset) const;
int write_fd_zero_copy(int fd) const;
void prepare_iov(std::vector<iovec> *piov) const;
uint32_t crc32c(uint32_t crc) const;
Expand Down
3 changes: 3 additions & 0 deletions src/include/config-h.in.cmake
Expand Up @@ -123,6 +123,9 @@
/* Define to 1 if you have the `syncfs' function. */
#cmakedefine HAVE_SYNCFS 1

/* Define to 1 if you have the `pwritev' function. */
#cmakedefine HAVE_PWRITEV 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you want to add

CHECK_FUNCTION_EXISTS(pwritev HAVE_PWRITEV) 

in CMakeLists.txt also?


/* sync_file_range(2) is supported */
#cmakedefine HAVE_SYNC_FILE_RANGE

Expand Down
19 changes: 1 addition & 18 deletions src/os/filestore/FileStore.cc
Expand Up @@ -3188,8 +3188,6 @@ int FileStore::_write(coll_t cid, const ghobject_t& oid,
dout(15) << "write " << cid << "/" << oid << " " << offset << "~" << len << dendl;
int r;

int64_t actual;

FDRef fd;
r = lfn_open(cid, oid, true, &fd);
if (r < 0) {
Expand All @@ -3199,23 +3197,8 @@ int FileStore::_write(coll_t cid, const ghobject_t& oid,
goto out;
}

// seek
actual = ::lseek64(**fd, offset, SEEK_SET);
if (actual < 0) {
r = -errno;
dout(0) << "write lseek64 to " << offset << " failed: " << cpp_strerror(r) << dendl;
lfn_close(fd);
goto out;
}
if (actual != (int64_t)offset) {
dout(0) << "write lseek64 to " << offset << " gave bad offset " << actual << dendl;
r = -EIO;
lfn_close(fd);
goto out;
}

// write
r = bl.write_fd(**fd);
r = bl.write_fd(**fd, offset);
if (r == 0)
r = bl.length();

Expand Down
18 changes: 18 additions & 0 deletions src/test/bufferlist.cc
Expand Up @@ -2143,6 +2143,24 @@ TEST(BufferList, write_fd) {
::unlink(FILENAME);
}

TEST(BufferList, write_fd_offset) {
::unlink(FILENAME);
int fd = ::open(FILENAME, O_WRONLY|O_CREAT|O_TRUNC, 0600);
bufferlist bl;
for (unsigned i = 0; i < IOV_MAX * 2; i++) {
bufferptr ptr("A", 1);
bl.push_back(ptr);
}
uint64_t offset = 200;
EXPECT_EQ(0, bl.write_fd(fd, offset));
::close(fd);
struct stat st;
memset(&st, 0, sizeof(st));
::stat(FILENAME, &st);
EXPECT_EQ(IOV_MAX * 2 + offset, st.st_size);
::unlink(FILENAME);
}

TEST(BufferList, crc32c) {
bufferlist bl;
__u32 crc = 0;
Expand Down