From e6550a61663dd7b6e0002e5482e1b4d92a09f1a8 Mon Sep 17 00:00:00 2001 From: Chris Hogan Date: Wed, 31 Aug 2022 16:00:09 -0500 Subject: [PATCH 01/15] Documenting Bucket.h --- doc/Doxyfile.in | 10 +++-- src/api/bucket.h | 56 +++++++++++++++++++++------- src/buffer_pool_visualizer/README.md | 2 +- 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 112ef2593..553a2045b 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -5,7 +5,7 @@ DOXYFILE_ENCODING = UTF-8 PROJECT_NAME = Hermes PROJECT_NUMBER = 0.8.0-beta -PROJECT_BRIEF = "I/O Buffering System" +PROJECT_BRIEF = "Hierarchical Distributed I/O Buffering System" PROJECT_LOGO = OUTPUT_DIRECTORY = CREATE_SUBDIRS = NO @@ -35,7 +35,11 @@ MULTILINE_CPP_IS_BRIEF = NO INHERIT_DOCS = YES SEPARATE_MEMBER_PAGES = NO TAB_SIZE = 4 -ALIASES = + +ALIASES += status="A Status object." +ALIASES += ctx{1}="ctx The Context for this \1." +ALIASES += bool{1}="true if \1, otherwise false." + TCL_SUBST = OPTIMIZE_OUTPUT_FOR_C = NO OPTIMIZE_OUTPUT_JAVA = NO @@ -113,7 +117,7 @@ INPUT_ENCODING = UTF-8 FILE_PATTERNS = *.h \ *.cc RECURSIVE = YES -EXCLUDE = +EXCLUDE = @PROJECT_SOURCE_DIR@/src/stb_ds.h EXCLUDE_SYMLINKS = NO EXCLUDE_PATTERNS = EXCLUDE_SYMBOLS = diff --git a/src/api/bucket.h b/src/api/bucket.h index 384155f4a..260480e91 100644 --- a/src/api/bucket.h +++ b/src/api/bucket.h @@ -29,15 +29,20 @@ namespace hermes { namespace api { +/** \brief A container for Blob%s + * + */ class Bucket { private: + /** The user-facing descriptor of this Bucket. */ std::string name_; + /** The internal descriptor of this Bucket. */ hermes::BucketID id_; public: - /** internal Hermes object owned by Bucket */ + /** The internal Hermes instance within which to create this Bucket. */ std::shared_ptr hermes_; - /** This Bucket's Context. \todo Why does a bucket need a context? */ + /** The api::Context that controls all operations on this Bucket. */ Context ctx_; // TODO(chogan): Think about the Big Three @@ -45,34 +50,57 @@ class Bucket { LOG(INFO) << "Create NULL Bucket " << std::endl; } + /** + * + */ Bucket(const std::string &initial_name, std::shared_ptr const &h, Context ctx = Context()); - /** - * \brief Releases the Bucket, decrementing its reference count + /** \brief Releases the Bucket, decrementing its reference count. * - * This does not free any resources. To remove the Bucket from the - * MetadataManager and free its stored Blobs, see Bucket::Destroy. + * This does not free any resources. To remove the Bucket%'s metadata and free + * its stored Blob%s, see Bucket::Destroy. */ ~Bucket(); - /** Get the name of bucket */ + /** \brief Get the user-facing name of the Bucket. + * + * \return The name of this Bucket. + */ std::string GetName() const; - /** Get the internal ID of the bucket */ + /** \brief Get the internal ID of the bucket. + * + * The ID is the internal representation of the Bucket%'s name. + * + * \return The internal Bucket ID. + */ u64 GetId() const; - /** Returns true if this Bucket has been created but not yet destroyed */ + /** \brief Return true if the Bucket is valid. + * + * A valid Bucket is one that was successfully created, contains metadata + * entries, has a valid ID, and has not been destroyed. An invalid Bucket + * cannot be used. + * + * \return \bool{the Bucket is valid} + */ bool IsValid() const; - /** Returns the total size of all Blobs in this Bucket. */ + /** \brief Return the total size of all Blobs in this Bucket. */ size_t GetTotalBlobSize(); - /** Put a blob in this bucket with context */ + /** Put a blob in this bucket with context + * + * \return \status + */ template Status Put(const std::string &name, const std::vector &data, Context &ctx); - /** Put a blob in this bucket \todo Why isn't this a context-free case? */ + /** \brief Put a blob in this bucket \todo Why isn't this a context-free case? + * + * \return \status + */ template Status Put(const std::string &name, const std::vector &data); @@ -82,9 +110,9 @@ class Bucket { * \param name A blob name * \param data A blob buffer * \param size The number of blob bytes in buffer - * \param ctx A Hermes context + * \param \ctx{Put} * - * \return The return code/status + * \return \status * * \pre The bucket must be valid. * \pre The blob name \p name length (as byte array) must not exceed #kMaxBlobName. diff --git a/src/buffer_pool_visualizer/README.md b/src/buffer_pool_visualizer/README.md index af8bfe779..7cff1e019 100644 --- a/src/buffer_pool_visualizer/README.md +++ b/src/buffer_pool_visualizer/README.md @@ -34,7 +34,7 @@ option `HERMES_DEBUG_HEAP=ON`. ### `BufferPool` mode -![Buffer Pool Visualizer](https://github.com/HDFGroup/hermes/wiki/images/bp_viz.png) +![Buffer Pool Visualizer](https://github.com/HDFGroup/hermes/wiki/images/buffer_pool_visualizer_default.png) ### `MetadataManager` mode From a64b67dce4e49eed7e8049e0b6d1f0474e0d56d2 Mon Sep 17 00:00:00 2001 From: Chris Hogan Date: Tue, 6 Sep 2022 15:54:43 -0500 Subject: [PATCH 02/15] API documentation progress --- adapter/test/vfd/hermes_vfd_test.cc | 2 +- doc/Doxyfile.in | 2 +- src/api/bucket.cc | 9 + src/api/bucket.h | 283 ++++++++++++++++++++-------- src/hermes_types.h | 3 + 5 files changed, 217 insertions(+), 82 deletions(-) diff --git a/adapter/test/vfd/hermes_vfd_test.cc b/adapter/test/vfd/hermes_vfd_test.cc index faa1ab2ca..e557c8e6d 100644 --- a/adapter/test/vfd/hermes_vfd_test.cc +++ b/adapter/test/vfd/hermes_vfd_test.cc @@ -218,7 +218,7 @@ struct Hdf5Api { } /** - * Create a 1-dimensional dataset named @pdset_name in object @p hid with @p + * Create a 1-dimensional dataset named @p dset_name in object @p hid with @p * nelems elements from the array @p data. */ void MakeDataset(hid_t hid, const std::string &dset_name, const f32 *data, diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 553a2045b..d47fb48b9 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -79,7 +79,7 @@ SHOW_INCLUDE_FILES = YES SHOW_GROUPED_MEMB_INC = NO FORCE_LOCAL_INCLUDES = NO INLINE_INFO = YES -SORT_MEMBER_DOCS = YES +SORT_MEMBER_DOCS = NO SORT_BRIEF_DOCS = NO SORT_MEMBERS_CTORS_1ST = NO SORT_GROUP_NAMES = NO diff --git a/src/api/bucket.cc b/src/api/bucket.cc index 88a766ecc..936dfbb9b 100644 --- a/src/api/bucket.cc +++ b/src/api/bucket.cc @@ -97,6 +97,13 @@ size_t Bucket::GetTotalBlobSize() { return result; } +size_t Bucket::GetBlobSize(const std::string &name, const Context &ctx) { + ScopedTemporaryMemory scratch(&hermes_->trans_arena_); + size_t result = GetBlobSize(scratch, name, ctx); + + return result; +} + size_t Bucket::GetBlobSize(Arena *arena, const std::string &name, const Context &ctx) { (void)ctx; @@ -248,6 +255,8 @@ Status Bucket::GetV(void *user_blob, Predicate pred, Context &ctx) { LOG(INFO) << "Getting blobs by predicate from bucket " << name_ << '\n'; + HERMES_NOT_IMPLEMENTED_YET; + return ret; } diff --git a/src/api/bucket.h b/src/api/bucket.h index 260480e91..1a5ca1406 100644 --- a/src/api/bucket.h +++ b/src/api/bucket.h @@ -45,13 +45,24 @@ class Bucket { /** The api::Context that controls all operations on this Bucket. */ Context ctx_; - // TODO(chogan): Think about the Big Three + // TODO(chogan): Think about copy/move constructor/assignment operators + + /** \brief Default constructor. + * + * Creates the "NULL" Bucket. + */ Bucket() : name_(""), id_{0, 0}, hermes_(nullptr) { LOG(INFO) << "Create NULL Bucket " << std::endl; } - /** + /** \brief Constructor. + * + * Create a Bucket with name \p initial_name, backed by Hermes instance \p h, + * with optional Context \p ctx. * + * \param initial_name The name of this Bucket. + * \param h An initialized Hermes instance. + * \param ctx An optional Context that controls the behavior of this Bucket. */ Bucket(const std::string &initial_name, std::shared_ptr const &h, Context ctx = Context()); @@ -87,159 +98,252 @@ class Bucket { */ bool IsValid() const; - /** \brief Return the total size of all Blobs in this Bucket. */ + /** \brief Return the total size in bytes of all Blob%s in this Bucket. + * + * \return The total size in bytes of all Blob%s in this Bucket. + */ size_t GetTotalBlobSize(); - /** Put a blob in this bucket with context + /** \brief Put a Blob in this bucket. + * + * Uses the Bucket%'s saved Context. + * + * \param name The name of the Blob to put. + * \param data The Blob data. * * \return \status */ template - Status Put(const std::string &name, const std::vector &data, Context &ctx); + Status Put(const std::string &name, const std::vector &data); - /** \brief Put a blob in this bucket \todo Why isn't this a context-free case? + /** \overload * - * \return \status + * \param \ctx{Put} */ template - Status Put(const std::string &name, const std::vector &data); + Status Put(const std::string &name, const std::vector &data, Context &ctx); + /** - * \brief Puts a blob to a bucket + * \brief Put a Blob in this Bucket. * - * \param name A blob name - * \param data A blob buffer - * \param size The number of blob bytes in buffer + * \param name The name of the Blob to Put + * \param data The Blob%'s data. + * \param size The size of the Blob in bytes. * \param \ctx{Put} * * \return \status * - * \pre The bucket must be valid. - * \pre The blob name \p name length (as byte array) must not exceed #kMaxBlobName. - * \pre The blob buffer \p data must not be \c nullptr unless \p size is 0. - * \pre If \p size is positive \p data must not be \c nullptr. + * \pre The Bucket must be valid. + * \pre The length of \p name in bytes must not exceed + * #hermes::api::kMaxBlobNameSize. + * \pre The Blob buffer \p data must not be \c nullptr unless \p size is 0. * + * \return \status */ - Status Put(const std::string &name, const u8 *data, size_t size, - const Context &ctx); + Status Put(const std::string &name, const u8 *data, size_t size); /** - * \todo Put + * \overload + * + * \param \ctx{Put} */ - Status Put(const std::string &name, const u8 *data, size_t size); + Status Put(const std::string &name, const u8 *data, size_t size, + const Context &ctx); - /** - * \todo Put + /** \brief Put a vector of Blob%s. + * + * \param names + * \param blobs + * + * \return \status */ template Status Put(const std::vector &names, - const std::vector> &blobs, const Context &ctx); + const std::vector> &blobs); - /** - * \todo Put + /** \overload + * + * \param \ctx{Put} */ template Status Put(const std::vector &names, - const std::vector> &blobs); + const std::vector> &blobs, const Context &ctx); - /** - * \todo PutInternal + /** \brief + * + * \return \status */ template Status PutInternal(const std::vector &names, const std::vector &sizes, const std::vector> &blobs, const Context &ctx); - /** - * \todo PlaceBlobs + /** \brief + * + * \return \status */ template Status PlaceBlobs(std::vector &schemas, const std::vector> &blobs, const std::vector &names, const Context &ctx); - /** Get the size in bytes of the Blob referred to by `name` */ + /** \brief Get the size in bytes of the Blob referred to by `name` + * + * \param name + * \param \ctx{call} + */ + size_t GetBlobSize(const std::string &name, const Context &ctx); + + /** \overload + * + * \param arena An Arena backed by allocated memory. + */ size_t GetBlobSize(Arena *arena, const std::string &name, const Context &ctx); - /** get a blob on this bucket */ - /** - if user_blob.size() == 0 => return the minimum buffer size needed */ - /** - if user_blob.size() > 0 => copy user_blob.size() bytes */ - /** to user_blob and return user_blob.size() */ - /** use provides buffer */ - size_t Get(const std::string &name, Blob& user_blob, const Context &ctx); + /** \brief Get a blob from this Bucket + * - if user_blob.size() == 0 => return the minimum buffer size needed + * - if user_blob.size() > 0 => copy user_blob.size() bytes + * to user_blob and return user_blob.size() + * use provides buffer + */ size_t Get(const std::string &name, Blob& user_blob); - /** - * \brief Retrieve multiple Blobs in one call. + /** \overload + * + * \param ctx{Get} + */ + size_t Get(const std::string &name, Blob& user_blob, const Context &ctx); + + /** \brief Retrieve multiple Blob%s in one call. + * */ std::vector Get(const std::vector &names, std::vector &blobs, const Context &ctx); - /** - * \brief Retrieve a Blob into a user buffer. + /** \brief Retrieve a Blob into a user buffer. + * */ size_t Get(const std::string &name, void *user_blob, size_t blob_size, const Context &ctx); - /** - * \brief Retrieves a blob from the Bucket. The Blob retrieved is the next - * one from the passed blob_index - * - * \pre if user_blob.size() == 0 => return the minimum buffer size needed - * \pre if user_blob.size() > 0 => copy user_blob.size() bytes to user_blob + /** \brief Retrieves a Blob from this Bucket. + * + * The Blob retrieved is the next one from the passed blob_index. + * + * \pre if user_blob.size() == 0 => return the minimum buffer size needed + * \pre if user_blob.size() > 0 => copy user_blob.size() bytes to user_blob * and return user_blob.size() - */ - size_t GetNext(u64 blob_index, Blob& user_blob, const Context &ctx); + */ size_t GetNext(u64 blob_index, Blob& user_blob); - /** - * \brief Retrieves a blob from the Bucket into a user buffer. The Blob - * retrieved is the next one from the passed blob_index - */ + /** \overload + * + * \param \ctx{call} + */ + size_t GetNext(u64 blob_index, Blob& user_blob, const Context &ctx); + + /** \brief Retrieves a blob from the Bucket into a user buffer. + * + * The Blob retrieved is the next one from the passed blob_index. + */ size_t GetNext(u64 blob_index, void *user_blob, size_t blob_size, const Context &ctx); - /** - * \brief Retrieves multiple blobs from the Bucket. The Blobs retrieved are - * the next ones from the passed blob_index - */ + /** \brief Retrieves multiple blobs from the Bucket. + * + * The Blobs retrieved are the next ones from the passed blob_index + */ std::vector GetNext(u64 blob_index, u64 count, std::vector &blobs, const Context &ctx); - /** get blob(s) on this bucket according to predicate */ - /** use provides buffer */ + + /** \brief Get Blob%(s) from this Bucket according to a predicate. + * + * \todo Not implemented yet. + * + * \return \status + */ template Status GetV(void *user_blob, Predicate pred, Context &ctx); - /** delete a blob from this bucket */ - Status DeleteBlob(const std::string &name, const Context &ctx); + /** \brief Delete a Blob from this Bucket. + * + * \return \status + */ Status DeleteBlob(const std::string &name); - /** rename a blob on this bucket */ + /** \overload + * + * \param \ctx{call} + */ + Status DeleteBlob(const std::string &name, const Context &ctx); + + /** \brief Rename a Blob in this Bucket. + * + * \return \status + */ + Status RenameBlob(const std::string &old_name, const std::string &new_name); + + /** \overload + * + * \param \ctx{call} + */ Status RenameBlob(const std::string &old_name, const std::string &new_name, const Context &ctx); - Status RenameBlob(const std::string &old_name, const std::string &new_name); - /** Returns true if the Bucket contains a Blob called `name` */ + /** \brief Returns true if the Bucket contains a Blob called \p name. + * + * \return \bool{the Blob \p name is in this Bucket} + */ bool ContainsBlob(const std::string &name); - /** Returns true if the Blob called `name` in this bucket is in swap space */ + /** \brief Return true if the Blob \p name is in swap space. + * + * \return \bool{the Blob called \p name in this Bucket is in swap space} + * + */ bool BlobIsInSwap(const std::string &name); - /** get a list of blob names filtered by pred */ + /** \brief Get a list of blob names filtered by \p pred. + * + */ template std::vector GetBlobNames(Predicate pred, Context &ctx); - /** rename this bucket */ - Status Rename(const std::string& new_name, const Context &ctx); + /** \brief Rename this Bucket. + * + * \param new_name A new name for the Bucket. + * + * \pre The length of \p new_name in bytes should be less than + * #kMaxBlobNameSize. + * + * \return \status + */ Status Rename(const std::string& new_name); - /** Save this bucket's blobs to persistent storage. + /** \overload * - * The blobs are written in the same order in which they are `Put`. */ - Status Persist(const std::string &file_name, const Context &ctx); + * \param \ctx{call}. + */ + Status Rename(const std::string& new_name, const Context &ctx); + + /** \brief Save this Bucket%'s Blob%s to persistent storage. + * + * The blobs are written in the same order in which they are `Put`. + * + * \param file_name The name of the file to persist the Blob%s to. + * + * \return \status + */ Status Persist(const std::string &file_name); + /** \overload + * + * \param \ctx{call}. + */ + Status Persist(const std::string &file_name, const Context &ctx); + /** - * \brief Allign \p blob_name's access speed to its importance. + * \brief Allign blob_name's access speed to its importance. * * \param blob_name The name of the Blob to organize. * @@ -256,19 +360,38 @@ class Bucket { void OrganizeBlob(const std::string &blob_name, f32 epsilon, f32 custom_importance = -1.f); - /** - * \brief Release this Bucket + /** \brief Release this Bucket. * - * This simply decrements the refcount to this Bucket in the Hermes metadata. - * To free resources associated with this Bucket, call Bucket::Destroy. + * This function simply decrements the refcount to this Bucket in the Hermes + * metadata. To free resources associated with this Bucket, call + * Bucket::Destroy. + * + * \return \status */ - Status Release(const Context &ctx); Status Release(); - /** destroy this bucket */ - /** ctx controls "aggressiveness */ - Status Destroy(const Context &ctx); + /** \overload + * + * \param \ctx{call} + */ + Status Release(const Context &ctx); + + /** \brief Destroy this Bucket. + * + * Deletes all metadata and Blob%s associated with this Bucket. + * + * \pre The Bucket must have a reference count of 1. Other ranks must first + * Bucket::Close the Bucket. + * + * \return \status + */ Status Destroy(); + + /** \overload + * + * \param \ctx{call}. + */ + Status Destroy(const Context &ctx); }; template diff --git a/src/hermes_types.h b/src/hermes_types.h index 60290b22c..c77e5ce6c 100644 --- a/src/hermes_types.h +++ b/src/hermes_types.h @@ -321,6 +321,9 @@ union BucketID { // BucketID into the Blob name. See MakeInternalBlobName() for a description of // why we need double the bytes of a BucketID. constexpr int kBucketIdStringSize = sizeof(BucketID) * 2; +/** + * The maximum size in bytes allowed for Blob names. + */ constexpr int kMaxBlobNameSize = 64 - kBucketIdStringSize; union VBucketID { From 33645490d889dc462dccd85adb4d3ab37c889b70 Mon Sep 17 00:00:00 2001 From: Chris Hogan Date: Wed, 7 Sep 2022 08:08:47 -0500 Subject: [PATCH 03/15] Finished documenting bucket.h --- src/api/bucket.cc | 2 + src/api/bucket.h | 115 +++++++++++++++++++++++++++++--------------- src/hermes_status.h | 3 ++ src/hermes_types.h | 15 ++++++ 4 files changed, 95 insertions(+), 40 deletions(-) diff --git a/src/api/bucket.cc b/src/api/bucket.cc index 936dfbb9b..f358f51a2 100644 --- a/src/api/bucket.cc +++ b/src/api/bucket.cc @@ -324,6 +324,8 @@ std::vector Bucket::GetBlobNames(Predicate pred, LOG(INFO) << "Getting blob names by predicate from bucket " << name_ << '\n'; + HERMES_NOT_IMPLEMENTED_YET; + return std::vector(); } diff --git a/src/api/bucket.h b/src/api/bucket.h index 1a5ca1406..6a95e40c5 100644 --- a/src/api/bucket.h +++ b/src/api/bucket.h @@ -25,6 +25,8 @@ #include "metadata_management.h" #include "utils.h" +/** \file bucket.h */ + namespace hermes { namespace api { @@ -104,7 +106,7 @@ class Bucket { */ size_t GetTotalBlobSize(); - /** \brief Put a Blob in this bucket. + /** \brief Put a Blob in this Bucket. * * Uses the Bucket%'s saved Context. * @@ -153,8 +155,11 @@ class Bucket { /** \brief Put a vector of Blob%s. * - * \param names - * \param blobs + * \param names 1 or more names, each of which is no longer than + * kMaxBlobNameSize bytes. + * \param blobs 1 or more Blob%s. + * + * \pre The length of \p names and \p blobs should be equal. * * \return \status */ @@ -170,27 +175,9 @@ class Bucket { Status Put(const std::vector &names, const std::vector> &blobs, const Context &ctx); - /** \brief - * - * \return \status - */ - template - Status PutInternal(const std::vector &names, - const std::vector &sizes, - const std::vector> &blobs, - const Context &ctx); - /** \brief - * - * \return \status - */ - template - Status PlaceBlobs(std::vector &schemas, - const std::vector> &blobs, - const std::vector &names, const Context &ctx); - - /** \brief Get the size in bytes of the Blob referred to by `name` + /** \brief Get the size in bytes of the Blob referred to by \p name. * - * \param name + * \param name The name of the Blob to query. * \param \ctx{call} */ size_t GetBlobSize(const std::string &name, const Context &ctx); @@ -201,38 +188,55 @@ class Bucket { */ size_t GetBlobSize(Arena *arena, const std::string &name, const Context &ctx); - /** \brief Get a blob from this Bucket - * - if user_blob.size() == 0 => return the minimum buffer size needed - * - if user_blob.size() > 0 => copy user_blob.size() bytes - * to user_blob and return user_blob.size() - * use provides buffer + /** \brief Get a blob from this Bucket. + * + * If if the size of \p user_blob is 0, return the minimum buffer size needed + * to contain the Blob \p name, otherwise copy \p user_blob.size() bytes to \p + * user_blob and return the number of bytes copied. + * + * \param name The name of the Blob to get. + * \param user_blob User-provided storage for the retrieved Blob. + * + * \return The size in bytes of the Blob. */ size_t Get(const std::string &name, Blob& user_blob); /** \overload * - * \param ctx{Get} + * \param \ctx{Get} */ size_t Get(const std::string &name, Blob& user_blob, const Context &ctx); /** \brief Retrieve multiple Blob%s in one call. + * + * \param names A list of names of the Blob%s to get. + * \param blobs User-provided storage for the retrieved Blob%s. + * \param \ctx{Get} + * + * \return The sizes in bytes of the Blob%s. * */ std::vector Get(const std::vector &names, std::vector &blobs, const Context &ctx); - /** \brief Retrieve a Blob into a user buffer. + /** \overload * */ size_t Get(const std::string &name, void *user_blob, size_t blob_size, const Context &ctx); - /** \brief Retrieves a Blob from this Bucket. + + /** \brief Given an ordering of Blob%s, retrieves the Blob at index \p + * blob_index + 1. + * + * By default Blob%s are arranged in the order in which they were Put. If + * user_blob.size() == 0, return the minimum buffer size needed. If + * user_blob.size() > 0, copy user_blob.size() bytes to user_blob and return + * user_blob.size() * - * The Blob retrieved is the next one from the passed blob_index. + * \param blob_index The starting index. + * \param user_blob User-provided memory for the Blob. * - * \pre if user_blob.size() == 0 => return the minimum buffer size needed - * \pre if user_blob.size() > 0 => copy user_blob.size() bytes to user_blob - * and return user_blob.size() + * \return The size in bytes of the retrieved Blob. */ size_t GetNext(u64 blob_index, Blob& user_blob); @@ -242,13 +246,14 @@ class Bucket { */ size_t GetNext(u64 blob_index, Blob& user_blob, const Context &ctx); - /** \brief Retrieves a blob from the Bucket into a user buffer. + /** \overload * - * The Blob retrieved is the next one from the passed blob_index. + * \param \ctx{call} */ size_t GetNext(u64 blob_index, void *user_blob, size_t blob_size, const Context &ctx); + // TODO(chogan): /** \brief Retrieves multiple blobs from the Bucket. * * The Blobs retrieved are the next ones from the passed blob_index @@ -266,6 +271,8 @@ class Bucket { Status GetV(void *user_blob, Predicate pred, Context &ctx); /** \brief Delete a Blob from this Bucket. + * + * \param name The name of the Blob to delete. * * \return \status */ @@ -278,6 +285,11 @@ class Bucket { Status DeleteBlob(const std::string &name, const Context &ctx); /** \brief Rename a Blob in this Bucket. + * + * \param old_name The Blob to rename. + * \param new_name The desired new name of the Blob. + * + * \pre The size in bytes of \p new_name must be <= to kMaxBlobNameSize. * * \return \status */ @@ -291,6 +303,8 @@ class Bucket { const Context &ctx); /** \brief Returns true if the Bucket contains a Blob called \p name. + * + * \param name The name of the Blob to check. * * \return \bool{the Blob \p name is in this Bucket} */ @@ -298,13 +312,15 @@ class Bucket { /** \brief Return true if the Blob \p name is in swap space. * - * \return \bool{the Blob called \p name in this Bucket is in swap space} + * \param name The name of the Blob to check. * + * \return \bool{the Blob called \p name in this Bucket is in swap space} */ bool BlobIsInSwap(const std::string &name); /** \brief Get a list of blob names filtered by \p pred. * + * \todo Not implemented yet. */ template std::vector GetBlobNames(Predicate pred, Context &ctx); @@ -314,7 +330,7 @@ class Bucket { * \param new_name A new name for the Bucket. * * \pre The length of \p new_name in bytes should be less than - * #kMaxBlobNameSize. + * #kMaxBucketNameSize. * * \return \status */ @@ -328,7 +344,7 @@ class Bucket { /** \brief Save this Bucket%'s Blob%s to persistent storage. * - * The blobs are written in the same order in which they are `Put`. + * The blobs are written in the same order in which they were \p Put. * * \param file_name The name of the file to persist the Blob%s to. * @@ -392,6 +408,25 @@ class Bucket { * \param \ctx{call}. */ Status Destroy(const Context &ctx); + + private: + /** \brief Internal version of Put, called by all overloads. + * + * \return \status + */ + template + Status PutInternal(const std::vector &names, + const std::vector &sizes, + const std::vector> &blobs, + const Context &ctx); + /** \brief Low-level version of Put. + * + * \return \status + */ + template + Status PlaceBlobs(std::vector &schemas, + const std::vector> &blobs, + const std::vector &names, const Context &ctx); }; template diff --git a/src/hermes_status.h b/src/hermes_status.h index 339a3073f..dc723d135 100644 --- a/src/hermes_status.h +++ b/src/hermes_status.h @@ -14,6 +14,9 @@ #define HERMES_STATUS_H_ #include + +/** \file hermes_status.h */ + namespace hermes { #define RETURN_CODES(X) \ diff --git a/src/hermes_types.h b/src/hermes_types.h index c77e5ce6c..1dd4c1255 100644 --- a/src/hermes_types.h +++ b/src/hermes_types.h @@ -24,10 +24,18 @@ #include "hermes_version.h" +/** + * \file hermes_types.h + * Types used in Hermes. + */ + #define KILOBYTES(n) (((size_t)n) * 1024) #define MEGABYTES(n) (((size_t)n) * 1024 * 1024) #define GIGABYTES(n) (((size_t)n) * 1024UL * 1024UL * 1024UL) +/** + * \namespace hermes + */ namespace hermes { typedef uint8_t u8; @@ -49,7 +57,14 @@ struct ChunkedIdList { u32 capacity; }; +/** + * \namespace api + */ namespace api { + +/** + * A Blob is simply an uninterpreted vector of bytes. + */ typedef std::vector Blob; /** Supported data placement policies */ From 8a5df705a8c6a7b942059640df1cad333096b562 Mon Sep 17 00:00:00 2001 From: Chris Hogan Date: Wed, 7 Sep 2022 08:09:51 -0500 Subject: [PATCH 04/15] Remove unused api/id.h --- src/api/id.h | 50 -------------------------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 src/api/id.h diff --git a/src/api/id.h b/src/api/id.h deleted file mode 100644 index 4c24c5fb8..000000000 --- a/src/api/id.h +++ /dev/null @@ -1,50 +0,0 @@ -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - * Distributed under BSD 3-Clause license. * - * Copyright by The HDF Group. * - * Copyright by the Illinois Institute of Technology. * - * All rights reserved. * - * * - * This file is part of Hermes. The full Hermes copyright notice, including * - * terms governing use, modification, and redistribution, is contained in * - * the COPYING file, which can be found at the top directory. If you do not * - * have access to the file, you may request a copy from help@hdfgroup.org. * - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#ifndef ID_H_ -#define ID_H_ - -// See https://www.ilikebigbits.com/2014_05_06_type_safe_handles.html - -namespace hermes { - -namespace api { - -/** ID class template */ -template -class ID { - private: - T m_val_; - - public: - /** Returns the invalid ID */ - static ID Invalid() { return ID(); } - - /** Defaults to ID::invalid() */ - ID() : m_val_(default_value) { } - - /** Explicit constructor */ - explicit ID(T val) : m_val_(val) { } - - /** Explicit conversion to get back the T value */ - explicit operator T() const { return m_val_; } - - /** Compare IDs for equality */ - friend bool operator==(ID a, ID b) { return a.m_val == b.m_val; } - /** Compare IDs for inequality */ - friend bool operator!=(ID a, ID b) { return a.m_val != b.m_val; } -}; - -} // namespace api -} // namespace hermes - -#endif // ID_H_ From 7767138beb801e9b4cacb662d938ec993a6d39b7 Mon Sep 17 00:00:00 2001 From: Chris Hogan Date: Wed, 7 Sep 2022 08:13:36 -0500 Subject: [PATCH 05/15] Fix compilation --- src/api/hermes.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/api/hermes.h b/src/api/hermes.h index 982cb21cd..6102448b5 100644 --- a/src/api/hermes.h +++ b/src/api/hermes.h @@ -34,17 +34,19 @@ #include "buffer_pool.h" #include "metadata_management.h" #include "rpc.h" -#include "id.h" + +/** \file hermes.h */ namespace hermes { namespace api { -/** Return the (semantic versioning compatible) version of Hermes in the form - * MAJOR.MINOR.PATCH +/** \brief Return the (semantic versioning compatible) version of Hermes. + * + * \return A string in the form MAJOR.MINOR.PATCH */ std::string GetVersion(); -/** Hermes node state */ +/** Class representing an instance of Hermes. */ class Hermes { public: std::set bucket_list_; From a24f5df2480d3ee3660bc65f2628d2306f2afab1 Mon Sep 17 00:00:00 2001 From: Chris Hogan Date: Wed, 7 Sep 2022 19:11:42 -0500 Subject: [PATCH 06/15] WIP: documenting hermes.h --- adapter/mpiio/datastructures.h | 2 +- adapter/posix/datastructures.h | 2 +- adapter/stdio/datastructures.h | 2 +- src/api/hermes.h | 110 +++++++++++++++++++++++---------- 4 files changed, 79 insertions(+), 37 deletions(-) diff --git a/adapter/mpiio/datastructures.h b/adapter/mpiio/datastructures.h index 212d3b896..f84340d60 100644 --- a/adapter/mpiio/datastructures.h +++ b/adapter/mpiio/datastructures.h @@ -17,7 +17,7 @@ * Standard header */ #include - +#include #include /** diff --git a/adapter/posix/datastructures.h b/adapter/posix/datastructures.h index 7b809c126..11d177c2d 100644 --- a/adapter/posix/datastructures.h +++ b/adapter/posix/datastructures.h @@ -17,7 +17,7 @@ * Standard header */ #include - +#include #include /** diff --git a/adapter/stdio/datastructures.h b/adapter/stdio/datastructures.h index 987e1f73e..dae5b327b 100644 --- a/adapter/stdio/datastructures.h +++ b/adapter/stdio/datastructures.h @@ -17,7 +17,7 @@ * Standard header */ #include - +#include #include /** diff --git a/src/api/hermes.h b/src/api/hermes.h index 6102448b5..86ed39c3b 100644 --- a/src/api/hermes.h +++ b/src/api/hermes.h @@ -24,9 +24,6 @@ #include #include -#include -#include -#include #include @@ -46,59 +43,104 @@ namespace api { */ std::string GetVersion(); -/** Class representing an instance of Hermes. */ +/** Class representing an instance of a Hermes buffering system. */ class Hermes { public: - std::set bucket_list_; - std::set vbucket_list_; + /** \bool{Hermes is initialized} */ + bool is_initialized; // TODO(chogan): Temporarily public to facilitate iterative development. hermes::SharedMemoryContext context_; hermes::CommunicationContext comm_; hermes::RpcContext rpc_; hermes::Arena trans_arena_; + /** The name of the shared memory segment in which all Hermes data is + * stored. + */ std::string shmem_name_; + /** The name of the primary RPC server. */ std::string rpc_server_name_; - bool is_initialized; - - /** if true will do more checks, warnings, expect slower code */ - const bool debug_mode_ = true; Hermes() {} explicit Hermes(SharedMemoryContext context) : context_(context) {} - /** Display the list of buckets in this node */ - void Display_bucket() { - for (auto it = bucket_list_.begin(); it != bucket_list_.end(); ++it) - std::cout << *it << '\t'; - std::cout << '\n'; - } - - /** Display the list of vbuckets in this node */ - void Display_vbucket() { - for (auto it = vbucket_list_.begin(); it != vbucket_list_.end(); ++it) - std::cout << *it << '\t'; - std::cout << '\n'; - } - - /** Returns whether we are running on an application core. */ + /** \brief Return \bool{this rank is an application core} + * + * An application core is a core or rank on which user code runs as opposed to + * the Hermes core (or rank) which only runs Hermes services. + * + * \return \bool{this rank is an application core} + */ bool IsApplicationCore(); - /** Returns whether we are the first MPI rank on a given node */ + + /** \brief Returns \bool{this is the first MPI rank on this node} + * + * Hermes assigns numeric IDs to each rank. The first rank on the node is the + * lowest ID on that node. + * + * \return \bool{this is the first MPI rank on this node} + */ bool IsFirstRankOnNode(); - /** A barrier across all application processes. */ + + /** \brief A barrier across all application processes. + * + * Like MPI_Barrier but only involves application ranks. + */ void AppBarrier(); - /** Returns the rank of this process */ + + /** \brief Returns the rank of this process. + * + * Hermes assigns each application core a unique rank. + * + * \return The rank of this process. + */ int GetProcessRank(); - /** Return the Node ID of this process */ + + /** \brief Return ID of the node this process is running on. + * + * Hermes assigns each node a numeric ID. + * + * \return The node's ID. + */ int GetNodeId(); - /** Returns the total number of application processes */ + + /** \brief Returns the total number of application processes. + * + * Does not count Hermes processes. + * + * \return The number of application processes. + */ int GetNumProcesses(); - /** Get an application communicator handle */ + + /** \brief Get an application communicator handle. + * + * The handle can be cast to the appropriate type for the communication + * backend and used in the backend's API calls. For example, when using the + * MPI communication backend (the default), this function returns a pointer to + * an MPI_Comm object, which can then be used in any MPI call. + * + * \return A void pointer to a communicator handle. + */ void *GetAppCommunicator(); - /** \todo Hermes::Finalize */ + + /** \brief Shutdown Hermes. + * + * This should be called by every process (application and Hermes cores) + * before shutting down the communication backend (e.g., MPI_Finalize). + * + * \param force_rpc_shutdown This should be \c true if Hermes was initialized + * as a daemon. + */ void Finalize(bool force_rpc_shutdown = false); - /** \todo Hermes::FinalizeClient */ + + /** \brief Shutdown application cores. + * + * + * + * \param stop_daemon By default this function will stop the daemon this + * client is connected to. Passing \c false here will keep it alive. + */ void FinalizeClient(bool stop_daemon = true); /** \todo Hermes::RemoteFinalize */ void RemoteFinalize(); @@ -108,7 +150,7 @@ class Hermes { /** Check if a given bucket contains a blob. */ bool BucketContainsBlob(const std::string &bucket_name, const std::string &blob_name); - /** Returns true if @p bucket_name exists in this Hermes instance. */ + /** Returns true if \p bucket_name exists in this Hermes instance. */ bool BucketExists(const std::string &bucket_name); }; From 440bf2fb62d599fb0c035eba7d3c6a68cb170afd Mon Sep 17 00:00:00 2001 From: Chris Hogan Date: Thu, 8 Sep 2022 07:31:18 -0500 Subject: [PATCH 07/15] Remove id.h from installation targets --- src/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 948ba394c..552b7cc03 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -84,7 +84,6 @@ set(HERMES_EXPORTED_LIBS hermes ${HERMES_EXPORTED_LIBS}) set(HERMES_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/api/bucket.h ${CMAKE_CURRENT_SOURCE_DIR}/api/hermes.h - ${CMAKE_CURRENT_SOURCE_DIR}/api/id.h ${CMAKE_CURRENT_SOURCE_DIR}/api/vbucket.h ${CMAKE_CURRENT_SOURCE_DIR}/buffer_pool.h ${CMAKE_CURRENT_SOURCE_DIR}/buffer_pool_internal.h From 322cd94b9a49e62609a0880f372de8d47cd0fbbd Mon Sep 17 00:00:00 2001 From: Chris Hogan Date: Thu, 8 Sep 2022 16:03:39 -0500 Subject: [PATCH 08/15] Finished documenting hermes.h --- adapter/stdio/datastructures.h | 1 + src/api/hermes.cc | 2 + src/api/hermes.h | 91 +++++++++++++++++++++++++++++----- 3 files changed, 81 insertions(+), 13 deletions(-) diff --git a/adapter/stdio/datastructures.h b/adapter/stdio/datastructures.h index dae5b327b..c25707422 100644 --- a/adapter/stdio/datastructures.h +++ b/adapter/stdio/datastructures.h @@ -28,6 +28,7 @@ * Internal header */ #include +#include #include #include #include diff --git a/src/api/hermes.cc b/src/api/hermes.cc index f8f2dceeb..6ff7bcbb5 100644 --- a/src/api/hermes.cc +++ b/src/api/hermes.cc @@ -64,6 +64,8 @@ Status TransferBlob(const Bucket &src_bkt, LOG(INFO) << "Transferring Blob from " << src_blob_name << " to " << dst_blob_name << '\n'; + HERMES_NOT_IMPLEMENTED_YET; + return ret; } diff --git a/src/api/hermes.h b/src/api/hermes.h index 86ed39c3b..9767cc0bb 100644 --- a/src/api/hermes.h +++ b/src/api/hermes.h @@ -136,26 +136,46 @@ class Hermes { /** \brief Shutdown application cores. * - * + * To be called from application cores that were started separately from a + * Hermes daemon. Normally this is called from adapters. * * \param stop_daemon By default this function will stop the daemon this * client is connected to. Passing \c false here will keep it alive. */ void FinalizeClient(bool stop_daemon = true); - /** \todo Hermes::RemoteFinalize */ + + /** \todo Is this still necessary? + * + */ void RemoteFinalize(); - /** \todo Hermes::RunDaemon */ + + /** \brief Starts a Hermes daemon. + * + * Starts all Hermes services, then waits on the main thread to be finalized. + * + * \pre The Hermes instance must be initialized with InitHermesDaemon. + */ void RunDaemon(); - /** Check if a given bucket contains a blob. */ + /** \brief Check if a given Bucket contains a Blob. + * + * \param bucket_name The name of the Bucket to check. + * \param blob_name The name of the Blob to check. + * + * \return \bool{the bucket \p bucket_name contains the Blob \p blob_name} + */ bool BucketContainsBlob(const std::string &bucket_name, const std::string &blob_name); - /** Returns true if \p bucket_name exists in this Hermes instance. */ + + /** \brief Returns true if \p bucket_name exists in this Hermes instance. + * + * \param bucket_name The name of the Bucket to check. + * + * \return \bool{\p bucket_name exists in this Hermes instance} + */ bool BucketExists(const std::string &bucket_name); }; -class VBucket; - class Bucket; /** Renames a bucket referred to by name only */ @@ -163,28 +183,73 @@ Status RenameBucket(const std::string &old_name, const std::string &new_name, Context &ctx); -/** Transfers a blob between buckets */ +/** \todo Not implemented yet. */ Status TransferBlob(const Bucket &src_bkt, const std::string &src_blob_name, Bucket &dst_bkt, const std::string &dst_blob_name, Context &ctx); -/** \todo InitHermes */ +/** \brief Initialize an instance of Hermes. + * + * \param config_file The (relative or absolute) path to a hermes configuration + * file + * \param is_daemon \c true if initializing this Hermes instance as a daemon. + * \param is_adapter \c true if initializing this Hermes instance as an adapter, + * or client to an existing daemon. + * + * \pre Only one of \p is_daemon and \p is_adapter can be \c true. + * + * \return An initialized Hermes instance. + */ std::shared_ptr InitHermes(const char *config_file = NULL, bool is_daemon = false, bool is_adapter = false); } // namespace api -/** \todo InitHermes */ +/** \overload + * + * Allows programatically generating configurations. + * + * \param config A valid Config. + * + * \return An initialized Hermes instance. + */ std::shared_ptr InitHermes(Config *config, bool is_daemon = false, bool is_adapter = false); -/** \todo InitHermesDaemon */ + +/** \brief Initialize a Hermes instance as a daemon. + * + * A Hermes daemon is one or more processes (one per node) that handle all + * Hermes background services. This includes RPC servers, thread pools, buffer + * organization, and SystemViewState updates. A daemon is necessary in workflows + * that involve 2 or more applications sharing buffered data. Without a daemon, + * (i.e., co-deploying Hermes services with an application) the lifetime of + * Hermes is tied to the app. + * + * \param config_file The (relative or absolute) path to a hermes configuration + * file + * + * \return An initialized Hermes instance. + */ std::shared_ptr InitHermesDaemon(char *config_file = NULL); -/** \todo InitHermesDaemon */ + +/** \overload + * + * \param config A valid Config. + */ std::shared_ptr InitHermesDaemon(Config *config); -/** \todo InitHermesClient */ + +/** \brief Initialize a Hermes instance as a client or adapter. + * + * \param config_file The (relative or absolute) path to a hermes configuration + * file + * + * \pre An existing Hermes daemon must already be running. + * + * \return An initialized Hermes instance. + */ std::shared_ptr InitHermesClient(const char *config_file = NULL); } // namespace hermes From 8091dbe749a25d69ace0510f6d8055ef4644ab47 Mon Sep 17 00:00:00 2001 From: Chris Hogan Date: Thu, 8 Sep 2022 16:24:52 -0500 Subject: [PATCH 09/15] Documenting vbucket.h --- src/api/vbucket.cc | 25 ++++++++++ src/api/vbucket.h | 113 ++++++++++++++++++++++----------------------- 2 files changed, 81 insertions(+), 57 deletions(-) diff --git a/src/api/vbucket.cc b/src/api/vbucket.cc index f567d7c2d..ec9e6c6da 100644 --- a/src/api/vbucket.cc +++ b/src/api/vbucket.cc @@ -23,6 +23,31 @@ namespace hermes { namespace api { +VBucket::VBucket(std::string initial_name, std::shared_ptr const &h, + Context ctx = Context()) + : name_(initial_name), + id_({{0, 0}}), + attached_traits_(), + hermes_(h), + ctx_(ctx) { + if (IsVBucketNameTooLong(name_)) { + id_.as_int = 0; + throw std::length_error("VBucket name exceeds maximum size of " + + std::to_string(kMaxVBucketNameSize)); + } else { + id_ = GetOrCreateVBucketId(&hermes_->context_, &hermes_->rpc_, name_); + if (!IsValid()) { + throw std::runtime_error("Could not open or create VBucket"); + } + } +} + +VBucket::~VBucket() { + if (IsValid()) { + Release(); + } +} + bool VBucket::IsValid() const { return !IsNullVBucketId(id_); } void VBucket::WaitForBackgroundFlush() { diff --git a/src/api/vbucket.h b/src/api/vbucket.h index fd963ab13..45ac4f57d 100644 --- a/src/api/vbucket.h +++ b/src/api/vbucket.h @@ -28,8 +28,8 @@ namespace hermes { namespace api { /** - * Virtual buckets (vbuckets) capture relationships between blobs - * across bucket boundaries. + * Virtual buckets (VBucket%s) capture relationships between Blob%s + * across Bucket boundaries. */ class VBucket { private: @@ -45,31 +45,20 @@ class VBucket { Context ctx_; public: + /** + * + */ VBucket(std::string initial_name, std::shared_ptr const &h, - Context ctx = Context()) - : name_(initial_name), - id_({{0, 0}}), - attached_traits_(), - hermes_(h), - ctx_(ctx) { - if (IsVBucketNameTooLong(name_)) { - id_.as_int = 0; - throw std::length_error("VBucket name exceeds maximum size of " + - std::to_string(kMaxVBucketNameSize)); - } else { - id_ = GetOrCreateVBucketId(&hermes_->context_, &hermes_->rpc_, name_); - if (!IsValid()) { - throw std::runtime_error("Could not open or create VBucket"); - } - } - } - - ~VBucket() { - if (IsValid()) { - Release(); - } - } + Context ctx = Context()); + /** + * + */ + ~VBucket(); + + /** + * + */ bool IsValid() const; /** get the name of vbucket */ @@ -88,11 +77,11 @@ class VBucket { * Blobs. Additional calls the Trait::OnLinkFn function on the Blob for each * attached Trait. * - * @param blob_name The name of the Blob to link. - * @param bucket_name The name of the Bucket containing the Blob to link. - * @param ctx Currently unused. + * \param blob_name The name of the Blob to link. + * \param bucket_name The name of the Bucket containing the Blob to link. + * \param ctx Currently unused. * - * @return A Status. + * \return \status */ Status Link(std::string blob_name, std::string bucket_name, Context &ctx); /** \todo Link */ @@ -101,10 +90,10 @@ class VBucket { /** * Unlink a Blob from this VBucket. * - * @param blob_name The name of the Blob to unlink. - * @param bucket_name The name of the Bucket containing the Blob to unlink. + * \param blob_name The name of the Blob to unlink. + * \param bucket_name The name of the Bucket containing the Blob to unlink. * - * @return A Status. + * \return \status */ Status Unlink(std::string blob_name, std::string bucket_name, Context &ctx); /** \todo Unlink */ @@ -128,39 +117,46 @@ class VBucket { /** could return iterator */ std::vector GetLinks(Context &ctx); - /** - * Attach a trait to this VBucket. + /** \brie Attach a Trait to this VBucket. * - * Calls the Trait::onAttachFn function of @p trait on each Blob that's linked + * Calls the Trait::onAttachFn function of \p trait on each Blob that's linked * to this VBucket. * - * @param trait The Trait to attach. - * @param ctx Currently unused. + * \param trait The Trait to attach. * - * @return A Status. + * \return \status */ - Status Attach(Trait *trait, Context &ctx); Status Attach(Trait *trait); - /** detach a trait to this vbucket */ - Status Detach(Trait *trait, Context &ctx); + /** \overload + * + * \param ctx Currently unused. + */ + Status Attach(Trait *trait, Context &ctx); + + /** \brief Detach a trait from this VBucket. + * + */ Status Detach(Trait *trait); + /** \overload + * + */ + Status Detach(Trait *trait, Context &ctx); + /** retrieves the subset of attached traits satisfying pred */ template std::vector GetTraits(Predicate pred, Context &ctx); - /** - * Get's an attached Trait that matches @p type. + /** \brief Get's an attached Trait that matches \p type. * - * @param type The type of Trait to retrieve. + * \param type The type of Trait to retrieve. * - * @return The first attached trait that matches @p type. + * \return The first attached trait that matches @p type. */ Trait *GetTrait(TraitType type); - /** - * Release this vBucket. + /** \brief Release this vBucket. * * This function does not result in any Trait callbacks being invoked or any * Blob links to be deleted. It simply decrements the reference count on this @@ -168,29 +164,32 @@ class VBucket { * reference count is 1. I.e., each rank that is not destroying the VBucket * must release it. * - * @param ctx Currently unused. + * \return A Status. + */ + Status Release(); + + /** \overload * - * @return A Status. + * \param ctx Currently unused. */ Status Release(Context &ctx); - /** \todo Release */ - Status Release(); - /** - * Destroy this VBucket. + /** \brief Destroy this VBucket. * * Releases all resources associated with this VBucket. If it is opened again, * it will be created from scratch. Unlinks all linked Blobs (which will * invoke each attached Trait's Trait::onUnlinkFn function), and detaches all * attached Traits, invoking Trait::onDetachFn. * - * @param ctx Currently unused. + * \return \status + */ + Status Destroy(); + + /** \overload * - * @return A Status. + * \param ctx Currently unused. */ Status Destroy(Context &ctx); - /** \todo Destroy */ - Status Destroy(); }; // class VBucket } // namespace api From 1243772c5643213863e4c87d9a67f36bc2900a92 Mon Sep 17 00:00:00 2001 From: Chris Hogan Date: Tue, 13 Sep 2022 15:38:46 -0500 Subject: [PATCH 10/15] VBucket API docs --- src/api/vbucket.h | 56 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/src/api/vbucket.h b/src/api/vbucket.h index 45ac4f57d..3a80142cd 100644 --- a/src/api/vbucket.h +++ b/src/api/vbucket.h @@ -33,35 +33,42 @@ namespace api { */ class VBucket { private: - /** vbucket name */ + /** The user-facing name of this VBucket. */ std::string name_; - /** vbucket ID */ + /** The internal ID of this VBucket. */ VBucketID id_; - /** Traits attached to this vbucket */ + /** Traits attached to this vbucket. */ std::list attached_traits_; - /** internal Hermes object owned by vbucket */ + /** The Hermes instance this VBucket is stored in. */ std::shared_ptr hermes_; - /** The Context for this VBucket. \todo Why do we need that? */ + /** The Context for this VBucket. Overrides the global default Context. */ Context ctx_; public: - /** + /** \brief * */ VBucket(std::string initial_name, std::shared_ptr const &h, Context ctx = Context()); - /** + /** \brief * */ ~VBucket(); - /** + /** \brief Return bool{this VBucket is valid} * + * A VBucket is valid if it has a non-NULL ID, meaning it has been registered + * in the Hermes system. + * + * \return \bool{this VBucket is valid} */ bool IsValid() const; - /** get the name of vbucket */ + /** \brief Return the name of this VBucket. + * + * \return The name of this VBucket. + */ std::string GetName() const { return this->name_; } /** @@ -73,20 +80,23 @@ class VBucket { /** * Link a Blob to this VBucket. * - * Adds Blob @p blob_name in Bucket @p bucket_name to this VBucket's list of + * Adds Blob \p blob_name in Bucket \p bucket_name to this VBucket's list of * Blobs. Additional calls the Trait::OnLinkFn function on the Blob for each * attached Trait. * * \param blob_name The name of the Blob to link. * \param bucket_name The name of the Bucket containing the Blob to link. - * \param ctx Currently unused. * * \return \status */ - Status Link(std::string blob_name, std::string bucket_name, Context &ctx); - /** \todo Link */ Status Link(std::string blob_name, std::string bucket_name); + /** \overload + * + * \param ctx Currently unused. + */ + Status Link(std::string blob_name, std::string bucket_name, Context &ctx); + /** * Unlink a Blob from this VBucket. * @@ -117,7 +127,7 @@ class VBucket { /** could return iterator */ std::vector GetLinks(Context &ctx); - /** \brie Attach a Trait to this VBucket. + /** \brief Attach a Trait to this VBucket. * * Calls the Trait::onAttachFn function of \p trait on each Blob that's linked * to this VBucket. @@ -127,24 +137,34 @@ class VBucket { * \return \status */ Status Attach(Trait *trait); - /** \overload * * \param ctx Currently unused. */ Status Attach(Trait *trait, Context &ctx); - /** \brief Detach a trait from this VBucket. + /** \brief Detach a trait from this VBucket. + * + * \param trait The Trait to detach. * + * \return \status */ Status Detach(Trait *trait); /** \overload * + * \param ctx Currently unused. */ Status Detach(Trait *trait, Context &ctx); - /** retrieves the subset of attached traits satisfying pred */ + /** \brief Retrieves the subset of attached traits satisfying the Predicate \p pred. + * + * \todo \p pred is curently ignored and this function returns all attached + * traits. + * + * \param pred \todo + * \param ctx Currently unused; + */ template std::vector GetTraits(Predicate pred, Context &ctx); @@ -152,7 +172,7 @@ class VBucket { * * \param type The type of Trait to retrieve. * - * \return The first attached trait that matches @p type. + * \return The first attached trait that matches \p type. */ Trait *GetTrait(TraitType type); From 00e5153844ff04166349e9c2e27729954af1fee6 Mon Sep 17 00:00:00 2001 From: Chris Hogan Date: Tue, 13 Sep 2022 15:45:32 -0500 Subject: [PATCH 11/15] Fix compilation --- src/api/vbucket.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/vbucket.cc b/src/api/vbucket.cc index ec9e6c6da..d9037180a 100644 --- a/src/api/vbucket.cc +++ b/src/api/vbucket.cc @@ -24,7 +24,7 @@ namespace hermes { namespace api { VBucket::VBucket(std::string initial_name, std::shared_ptr const &h, - Context ctx = Context()) + Context ctx) : name_(initial_name), id_({{0, 0}}), attached_traits_(), From 3223daf7b59c5dcf46b85cc6d5167ff2df576574 Mon Sep 17 00:00:00 2001 From: Chris Hogan Date: Wed, 14 Sep 2022 07:58:11 -0500 Subject: [PATCH 12/15] Finished documenting VBucket.h --- src/api/vbucket.cc | 8 +++++++- src/api/vbucket.h | 21 +++++++++++++++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/api/vbucket.cc b/src/api/vbucket.cc index d9037180a..ea6e16ea6 100644 --- a/src/api/vbucket.cc +++ b/src/api/vbucket.cc @@ -48,7 +48,13 @@ VBucket::~VBucket() { } } -bool VBucket::IsValid() const { return !IsNullVBucketId(id_); } +bool VBucket::IsValid() const { + return !IsNullVBucketId(id_); +} + +std::string VBucket::GetName() const { + return this->name_; +} void VBucket::WaitForBackgroundFlush() { AwaitAsyncFlushingTasks(&hermes_->context_, &hermes_->rpc_, id_); diff --git a/src/api/vbucket.h b/src/api/vbucket.h index 3a80142cd..f3e19aa28 100644 --- a/src/api/vbucket.h +++ b/src/api/vbucket.h @@ -45,14 +45,27 @@ class VBucket { Context ctx_; public: - /** \brief + /** \brief Create or open a VBucket. * + * If the VBucket \p initial_name doesn't already exist, it is created and + * registered in Hermes. If it does exists, it is opened and its reference + * count is incremented. Once a VBucket is created, it can be opened on any + * rank or node. + * + * \param initial_name The desired name of the VBucket. + * \param hermes An initialized Hermes instance. + * \param \ctx{VBucket} + * + * \pre The Hermes instance \p hermes must be be initialized. */ - VBucket(std::string initial_name, std::shared_ptr const &h, + VBucket(std::string initial_name, std::shared_ptr const &hermes, Context ctx = Context()); - /** \brief + /** \brief Close a VBucket. * + * This does not delete the VBucket from Hermes, it merely decrements the + * reference count. To delete the VBucket and all associated metadata use + * VBucket::Destroy. */ ~VBucket(); @@ -69,7 +82,7 @@ class VBucket { * * \return The name of this VBucket. */ - std::string GetName() const { return this->name_; } + std::string GetName() const; /** * Blocks until all outstanding asynchronous flushing tasks associated with From 66c7248460fd1120be6cf09a292dc1e2825ba182 Mon Sep 17 00:00:00 2001 From: Chris Hogan Date: Wed, 14 Sep 2022 17:00:40 -0500 Subject: [PATCH 13/15] Trait API docs --- src/api/traits.h | 71 +++++++++++++++++++++++++++++++++++++++------- src/hermes_types.h | 8 ++++-- 2 files changed, 66 insertions(+), 13 deletions(-) diff --git a/src/api/traits.h b/src/api/traits.h index 5006a1179..53c3f17bd 100644 --- a/src/api/traits.h +++ b/src/api/traits.h @@ -36,60 +36,111 @@ typedef BlobInfo TraitInput; struct Trait; using HermesPtr = std::shared_ptr; +// TODO(chogan): I don't think we need to pass a Trait* to these callbacks +// anymore. That is a relic of an old implementation. + /** Callback for blob->vbucket link events */ typedef std::function OnLinkCallback; /** Callback for trait->vbucket attach events */ typedef std::function OnAttachCallback; -/** Traits represent vbucket behavior */ +/** \brief Base class for Trait%s, which can attach functionality to VBucket%s. + * + * To add functionality to a VBucket, inherit from this class and implement the + * various callbacks. + */ struct Trait { /** The trait's ID */ TraitID id; - /** \todo ??? */ + /** IDs of Trait%s whose functionality conflict with this Trait. */ std::vector conflict_traits; - /** The trait's type */ + /** The trait's type. */ TraitType type; - /** Callback for trait->vbucket attach events */ + /** Callback for trait->vbucket attach events. */ OnAttachCallback onAttachFn; - /** Callback for trait-vbucket detach events. */ OnAttachCallback onDetachFn; - /** Callback for blob->vbucket link events */ + /** Callback for blob->vbucket link events. */ OnLinkCallback onLinkFn; - /** Callback for blob- &conflict_traits, TraitType type); }; -/** (File) Persistence trait */ +/** \brief Engable persisting a VBucket's linked Blob%s to permanent + * storage. + * + */ struct PersistTrait : public Trait { + /** The name of the file to flush the Blob%s to. */ std::string filename; + /** Maps Blob names to offsets within a file. */ std::unordered_map offset_map; + /** \bool{flushing data should block until finished} */ bool synchronous; + /** */ explicit PersistTrait(bool synchronous); + /** */ explicit PersistTrait(const std::string &filename, const std::unordered_map &offset_map, bool synchronous = false); + /** + * + */ void onAttach(HermesPtr hermes, VBucketID id, Trait *trait); + + /** \brief Currently a no-op. */ void onDetach(HermesPtr hermes, VBucketID id, Trait *trait); + + /** + * + */ void onLink(HermesPtr hermes, TraitInput &input, Trait *trait); + + /** \brief Currently a no-op. */ void onUnlink(HermesPtr hermes, TraitInput &input, Trait *trait); }; +/** \brief Marks the Blob%s in a VBucket as write-only. + * + * If we know that certain Blob%s are write-only, we can asynchronously and + * eagerly flush buffered data to the final destination. + * + */ struct WriteOnlyTrait : public Trait { + /** */ WriteOnlyTrait(); + /** \brief Currently a no-op. */ void onAttach(HermesPtr hermes, VBucketID id, Trait *trait); + + /** \brief Currently a no-op. */ void onDetach(HermesPtr hermes, VBucketID id, Trait *trait); + + /** + * + */ void onLink(HermesPtr hermes, TraitInput &input, Trait *trait); + + /** \brief Currently a no-op. */ void onUnlink(HermesPtr hermes, TraitInput &input, Trait *trait); - void onGet(HermesPtr hermes, TraitInput &input, Trait *trait); }; } // namespace api diff --git a/src/hermes_types.h b/src/hermes_types.h index 1dd4c1255..546a57528 100644 --- a/src/hermes_types.h +++ b/src/hermes_types.h @@ -375,10 +375,12 @@ typedef u64 TraitID; namespace api { -/** Trait types */ +/** \brief Trait types. + * + */ enum class TraitType : u8 { - META = 0, - DATA = 1, + META = 0, /**< The Trait only modifies metadata. */ + DATA = 1, /**< The Trait modifies raw data (Blob%s). */ PERSIST = 2, }; From e937891d90dedb9a4d89ff3683a1748387b973ec Mon Sep 17 00:00:00 2001 From: "H. Joe Lee" Date: Mon, 3 Oct 2022 11:25:40 -0500 Subject: [PATCH 14/15] Remove % from comment. I think it's typo. --- src/api/bucket.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/bucket.h b/src/api/bucket.h index 6a95e40c5..41302d1ea 100644 --- a/src/api/bucket.h +++ b/src/api/bucket.h @@ -108,7 +108,7 @@ class Bucket { /** \brief Put a Blob in this Bucket. * - * Uses the Bucket%'s saved Context. + * Uses the Bucket's saved Context. * * \param name The name of the Blob to put. * \param data The Blob data. From f850613243dd7dfccbd267feee89df90037fa83f Mon Sep 17 00:00:00 2001 From: Hyo-Kyung Lee Date: Wed, 12 Oct 2022 12:17:20 -0500 Subject: [PATCH 15/15] Add documentation for CI script. (#388) --- ci/install_deps.sh | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/ci/install_deps.sh b/ci/install_deps.sh index 2886c7e54..ae2ecc9df 100755 --- a/ci/install_deps.sh +++ b/ci/install_deps.sh @@ -1,14 +1,31 @@ #!/bin/bash +# Hermes dependency installation script +# +# Hermes depends on the following packages (in alphabetical order): +# +# Catch2 +# GLOG +# GLPK +# HDF5 +# IOR (for performance testing) +# Thallium +# yaml-cpp +# +# This script will build and install them via Spack from source +# because Hermes requires a very specific version and configuration options +# for each package. + set -x set -e set -o pipefail +# Change this especially when your $HOME doesn't have enough disk space. INSTALL_DIR="${HOME}/${LOCAL}" + SPACK_DIR=${INSTALL_DIR}/spack MOCHI_REPO_DIR=${INSTALL_DIR}/mochi-spack-packages THALLIUM_VERSION=0.10.0 -GOTCHA_VERSION=develop CATCH2_VERSION=3.0.1 SPACK_VERSION=0.18.1 HDF5_VERSION=1_13_1 @@ -26,7 +43,11 @@ set +x . ${SPACK_DIR}/share/spack/setup-env.sh set -x +# This will allow Spack to skip building some packages that are directly +# available from the system. For example, autoconf, cmake, m4, etc. +# Modify ci/pckages.yaml to skip building compilers or build tools via Spack. cp ci/packages.yaml ${SPACK_DIR}/etc/spack/packages.yaml + MOCHI_REPO=https://github.com/mochi-hpc/mochi-spack-packages.git # TODO(chogan): We pin this commit because in the past using the HEAD of 'main' # has been unstable. We update at controlled intervals rather than putting out @@ -38,6 +59,8 @@ pushd ${MOCHI_REPO_DIR} git checkout ${MOCHI_SPACK_PACKAGES_COMMIT} popd +# This will override Spack's default package repository to allow building +# a custom package when the same package is available from Spack. spack repo add ${MOCHI_REPO_DIR} spack repo add ./ci/hermes @@ -52,11 +75,15 @@ ALL_SPECS="${THALLIUM_SPEC} ${CATCH2_SPEC} ${GLPK_SPEC} ${GLOG_SPEC} ${HDF5_SPEC spack install ${ALL_SPECS} SPACK_STAGING_DIR=~/spack_staging mkdir -p ${SPACK_STAGING_DIR} + +# Spack installation directory has hash value. +# This will simplify and consolidate the installation path. spack view --verbose symlink ${SPACK_STAGING_DIR} ${ALL_SPECS} +# Copy what Spack installed in a temporary location to your desired location. cp -LRnv ${SPACK_STAGING_DIR}/* ${INSTALL_DIR} -# IOR +# Install a custom IOR that has patches for Hermes for performance testing. pushd ~ git clone https://github.com/ChristopherHogan/ior pushd ior