Skip to content

Commit

Permalink
Assume that bucket size only grows
Browse files Browse the repository at this point in the history
  • Loading branch information
lukemartinlogan committed Oct 19, 2022
1 parent 7c9ed21 commit d3e7468
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 68 deletions.
7 changes: 4 additions & 3 deletions adapter/filesystem/filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ void Filesystem::Open(AdapterStat &stat, File &f, const std::string &path) {
std::make_shared<hapi::PersistTrait>(path_str, offset_map, false);
stat.st_vbkt->Attach(stat.st_persist.get());
}
_OpenInitStats(f, stat, bucket_exists);
_OpenInitStats(f, stat);
_OpenInitStatsInternal(stat, bucket_exists);
mdm->Create(f, stat);
} else {
LOG(INFO) << "File opened before by adapter" << std::endl;
Expand Down Expand Up @@ -162,7 +163,7 @@ size_t Filesystem::Write(File &f, AdapterStat &stat, const void *ptr,
}
off_t f_offset = off + data_offset;
if (opts.seek_) { stat.st_ptr = f_offset; }
stat.st_size = std::max(stat.st_size, f_offset);
stat.st_size = std::max(stat.st_size, static_cast<size_t>(f_offset));

struct timespec ts;
timespec_get(&ts, TIME_UTC);
Expand Down Expand Up @@ -356,7 +357,7 @@ size_t Filesystem::Read(File &f, AdapterStat &stat, void *ptr,
<< " (stored file size: " << stat.st_size
<< " true file size: " << stdfs::file_size(bkt->GetName())
<< ")" << std::endl;
if (stat.st_ptr >= stat.st_size) {
if (static_cast<size_t>(stat.st_ptr) >= stat.st_size) {
LOG(INFO) << "The current offset: " << stat.st_ptr <<
" is larger than file size: " << stat.st_size << std::endl;
return 0;
Expand Down
29 changes: 26 additions & 3 deletions adapter/filesystem/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ struct AdapterStat {
mode_t st_mode; /* protection */
uid_t st_uid; /* user ID of owner */
gid_t st_gid; /* group ID of owner */
off_t st_size; /* total size, in bytes */
size_t st_size; /* total size, in bytes */
off_t st_ptr; /* Current ptr of FILE */
blksize_t st_blksize; /* blocksize for blob within bucket */
timespec st_atim; /* time of last access */
Expand Down Expand Up @@ -264,15 +264,38 @@ class Filesystem {
size_t _ReadExistingPartial(BlobPlacementIter &read_iter);
size_t _ReadNew(BlobPlacementIter &read_iter);

void _OpenInitStatsInternal(AdapterStat &stat, bool bucket_exists) {
// TODO(llogan): This isn't really parallel-safe.
/**
* Here we assume that the file size can only be growing.
* If the bucket already exists and has content not already in
* the file (e.g., when using ADAPTER_MODE=SCRATCH), we should
* use the size of the bucket instead.
*
* There are other concerns with what happens during multi-tenancy.
* What happens if one process is opening a file, while another
* process is adding content? The mechanics here aren't
* well-defined.
* */
if (bucket_exists) {
size_t bkt_size = stat.st_bkid->GetTotalBlobSize();
stat.st_size = std::max(bkt_size, stat.st_size);
LOG(INFO) << "Since bucket exists, should reset its size to: "
<< stat.st_size << std::endl;
}
if (stat.is_append) {
stat.st_ptr = stat.st_size;
}
}

/*
* The APIs to overload
* */
public:
virtual void _InitFile(File &f) = 0;

private:
virtual void _OpenInitStats(File &f, AdapterStat &stat,
bool bucket_exists) = 0;
virtual void _OpenInitStats(File &f, AdapterStat &stat) = 0;
virtual File _RealOpen(AdapterStat &stat, const std::string &path) = 0;
virtual size_t _RealWrite(const std::string &filename, off_t offset,
size_t size, const u8 *data_ptr,
Expand Down
21 changes: 1 addition & 20 deletions adapter/mpiio/fs_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -528,30 +528,11 @@ void MpiioFS::_InitFile(File &f) {
posix_api->close(fd);*/
}

void MpiioFS::_OpenInitStats(File &f, AdapterStat &stat, bool bucket_exists) {
(void) bucket_exists;
void MpiioFS::_OpenInitStats(File &f, AdapterStat &stat) {
MPI_Offset size = static_cast<MPI_Offset>(stat.st_size);
MPI_File_get_size(f.mpi_fh_, &size);
stat.st_size = size;
// TODO(llogan): This isn't really parallel-safe.
/**
* Here, we assume that when bkt_size is 0, then the bucket was just
* created and the true size of the file being opened is the
* size of the file on the PFS. However, if the user truncated
* the file at runtime, this will now be incorrect. Need a
* better way to determine what the true size of the file
* should be.
* */
if (bucket_exists) {
size_t bkt_size = stat.st_bkid->GetTotalBlobSize();
if (bkt_size > 0) {
stat.st_size = bkt_size;
}
LOG(INFO) << "Since bucket exists, should reset its size to: "
<< stat.st_size << std::endl;
}
if (stat.amode & MPI_MODE_APPEND) {
stat.st_ptr = stat.st_size;
stat.is_append = true;
}
}
Expand Down
2 changes: 1 addition & 1 deletion adapter/mpiio/fs_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ class MpiioFS : public hermes::adapter::fs::Filesystem {
* */

private:
void _OpenInitStats(File &f, AdapterStat &stat, bool bucket_exists) override;
void _OpenInitStats(File &f, AdapterStat &stat) override;
File _RealOpen(AdapterStat &stat, const std::string &path) override;
size_t _RealWrite(const std::string &filename, off_t offset, size_t size,
const u8 *data_ptr,
Expand Down
21 changes: 1 addition & 20 deletions adapter/posix/fs_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ void PosixFS::_InitFile(File &f) {
f.st_ino = st.st_ino;
}

void PosixFS::_OpenInitStats(File &f, AdapterStat &stat, bool bucket_exists) {
(void) bucket_exists;
void PosixFS::_OpenInitStats(File &f, AdapterStat &stat) {
struct stat st;
real_api->__fxstat(_STAT_VER, f.fd_, &st);
stat.st_mode = st.st_mode;
Expand All @@ -52,25 +51,7 @@ void PosixFS::_OpenInitStats(File &f, AdapterStat &stat, bool bucket_exists) {
stat.st_atim = st.st_atim;
stat.st_mtim = st.st_mtim;
stat.st_ctim = st.st_ctim;
// TODO(llogan): This isn't really parallel-safe.
/**
* Here, we assume that when bkt_size is 0, then the bucket was just
* created and the true size of the file being opened is the
* size of the file on the PFS. However, if the user truncated
* the file at runtime, this will now be incorrect. Need a
* better way to determine what the true size of the file
* should be.
* */
if (bucket_exists) {
size_t bkt_size = stat.st_bkid->GetTotalBlobSize();
if (bkt_size > 0) {
stat.st_size = bkt_size;
}
LOG(INFO) << "Since bucket exists, should reset its size to: "
<< stat.st_size << std::endl;
}
if (stat.flags & O_APPEND) {
stat.st_ptr = stat.st_size;
stat.is_append = true;
}
}
Expand Down
2 changes: 1 addition & 1 deletion adapter/posix/fs_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class PosixFS : public hermes::adapter::fs::Filesystem {
void _InitFile(File &f) override;

private:
void _OpenInitStats(File &f, AdapterStat &stat, bool bucket_exists) override;
void _OpenInitStats(File &f, AdapterStat &stat) override;
File _RealOpen(AdapterStat &stat, const std::string &path) override;
size_t _RealWrite(const std::string &filename, off_t offset, size_t size,
const u8 *data_ptr,
Expand Down
20 changes: 1 addition & 19 deletions adapter/stdio/fs_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void StdioFS::_InitFile(File &f) {
f.st_ino = st.st_ino;
}

void StdioFS::_OpenInitStats(File &f, AdapterStat &stat, bool bucket_exists) {
void StdioFS::_OpenInitStats(File &f, AdapterStat &stat) {
struct stat st;
posix_api->__fxstat(_STAT_VER, f.fd_, &st);
stat.st_mode = st.st_mode;
Expand All @@ -50,25 +50,7 @@ void StdioFS::_OpenInitStats(File &f, AdapterStat &stat, bool bucket_exists) {
stat.st_atim = st.st_atim;
stat.st_mtim = st.st_mtim;
stat.st_ctim = st.st_ctim;
// TODO(llogan): This isn't really parallel-safe.
/**
* Here, we assume that when bkt_size is 0, then the bucket was just
* created and the true size of the file being opened is the
* size of the file on the PFS. However, if the user truncated
* the file at runtime, this will now be incorrect. Need a
* better way to determine what the true size of the file
* should be.
* */
if (bucket_exists) {
size_t bkt_size = stat.st_bkid->GetTotalBlobSize();
if (bkt_size > 0) {
stat.st_size = bkt_size;
}
LOG(INFO) << "Since bucket exists, should reset its size to: "
<< stat.st_size << std::endl;
}
if (stat.mode_str.find('a') != std::string::npos) {
stat.st_ptr = stat.st_size;
stat.is_append = true;
}
}
Expand Down
2 changes: 1 addition & 1 deletion adapter/stdio/fs_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class StdioFS : public hermes::adapter::fs::Filesystem {
void _InitFile(File &f) override;

private:
void _OpenInitStats(File &f, AdapterStat &stat, bool bucket_exists) override;
void _OpenInitStats(File &f, AdapterStat &stat) override;
File _RealOpen(AdapterStat &stat, const std::string &path) override;
size_t _RealWrite(const std::string &filename, off_t offset, size_t size,
const u8 *data_ptr,
Expand Down

0 comments on commit d3e7468

Please sign in to comment.