From dad8328f2d502d18923c35f7b86a0cc2ccec133a Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Mon, 18 Jul 2016 09:31:40 -0400 Subject: [PATCH 01/16] journal: helper class for organizing optional settings Additional runtime configuration settings will be needed. The new class will avoid the need to expand the constructor. Signed-off-by: Jason Dillaman --- src/journal/JournalMetadata.cc | 9 +++++---- src/journal/JournalMetadata.h | 5 +++-- src/journal/Journaler.cc | 13 +++++++------ src/journal/Journaler.h | 7 ++++--- src/journal/Makefile.am | 1 + src/journal/Settings.h | 17 +++++++++++++++++ src/librbd/Journal.cc | 24 +++++++++++------------- src/test/journal/RadosTestFixture.cc | 9 ++++++--- src/test/journal/mock/MockJournaler.h | 5 +++-- src/test/journal/test_Journaler.cc | 9 +++++---- src/test/librbd/fsx.cc | 9 +++++---- src/test/librbd/journal/test_Entries.cc | 3 ++- src/test/librbd/test_mirroring.cc | 3 ++- src/test/rbd_mirror/test_ImageSync.cc | 3 ++- src/tools/rbd/action/Journal.cc | 3 ++- src/tools/rbd_mirror/ImageReplayer.cc | 7 +++++-- 16 files changed, 80 insertions(+), 47 deletions(-) create mode 100644 src/journal/Settings.h diff --git a/src/journal/JournalMetadata.cc b/src/journal/JournalMetadata.cc index 8fc5634a882e0..63c67f21cdb1a 100644 --- a/src/journal/JournalMetadata.cc +++ b/src/journal/JournalMetadata.cc @@ -402,9 +402,9 @@ JournalMetadata::JournalMetadata(ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock, librados::IoCtx &ioctx, const std::string &oid, const std::string &client_id, - double commit_interval) + const Settings &settings) : RefCountedObject(NULL, 0), m_cct(NULL), m_oid(oid), - m_client_id(client_id), m_commit_interval(commit_interval), m_order(0), + m_client_id(client_id), m_settings(settings), m_order(0), m_splay_width(0), m_pool_id(-1), m_initialized(false), m_work_queue(work_queue), m_timer(timer), m_timer_lock(timer_lock), m_lock("JournalMetadata::m_lock"), m_commit_tid(0), m_watch_ctx(this), @@ -795,7 +795,8 @@ void JournalMetadata::schedule_commit_task() { assert(m_commit_position_ctx != nullptr); if (m_commit_position_task_ctx == NULL) { m_commit_position_task_ctx = new C_CommitPositionTask(this); - m_timer->add_event_after(m_commit_interval, m_commit_position_task_ctx); + m_timer->add_event_after(m_settings.commit_interval, + m_commit_position_task_ctx); } } @@ -1045,7 +1046,7 @@ std::ostream &operator<<(std::ostream &os, << "active_set=" << jm.m_active_set << ", " << "client_id=" << jm.m_client_id << ", " << "commit_tid=" << jm.m_commit_tid << ", " - << "commit_interval=" << jm.m_commit_interval << ", " + << "commit_interval=" << jm.m_settings.commit_interval << ", " << "commit_position=" << jm.m_commit_position << ", " << "registered_clients=" << jm.m_registered_clients << "]"; return os; diff --git a/src/journal/JournalMetadata.h b/src/journal/JournalMetadata.h index c2fa711b9f8e1..046e77dac7b47 100644 --- a/src/journal/JournalMetadata.h +++ b/src/journal/JournalMetadata.h @@ -14,6 +14,7 @@ #include "cls/journal/cls_journal_types.h" #include "journal/AsyncOpTracker.h" #include "journal/JournalMetadataListener.h" +#include "journal/Settings.h" #include #include #include @@ -44,7 +45,7 @@ class JournalMetadata : public RefCountedObject, boost::noncopyable { JournalMetadata(ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock, librados::IoCtx &ioctx, const std::string &oid, - const std::string &client_id, double commit_interval); + const std::string &client_id, const Settings &settings); ~JournalMetadata(); void init(Context *on_init); @@ -287,7 +288,7 @@ class JournalMetadata : public RefCountedObject, boost::noncopyable { CephContext *m_cct; std::string m_oid; std::string m_client_id; - double m_commit_interval; + Settings m_settings; uint8_t m_order; uint8_t m_splay_width; diff --git a/src/journal/Journaler.cc b/src/journal/Journaler.cc index 7bfbbe956e494..05d80693dc861 100644 --- a/src/journal/Journaler.cc +++ b/src/journal/Journaler.cc @@ -68,25 +68,26 @@ Journaler::Threads::~Threads() { Journaler::Journaler(librados::IoCtx &header_ioctx, const std::string &journal_id, - const std::string &client_id, double commit_interval) + const std::string &client_id, const Settings &settings) : m_threads(new Threads(reinterpret_cast(header_ioctx.cct()))), m_client_id(client_id) { set_up(m_threads->work_queue, m_threads->timer, &m_threads->timer_lock, - header_ioctx, journal_id, commit_interval); + header_ioctx, journal_id, settings); } Journaler::Journaler(ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock, librados::IoCtx &header_ioctx, const std::string &journal_id, - const std::string &client_id, double commit_interval) + const std::string &client_id, const Settings &settings) : m_client_id(client_id) { set_up(work_queue, timer, timer_lock, header_ioctx, journal_id, - commit_interval); + settings); } void Journaler::set_up(ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock, librados::IoCtx &header_ioctx, - const std::string &journal_id, double commit_interval) { + const std::string &journal_id, + const Settings &settings) { m_header_ioctx.dup(header_ioctx); m_cct = reinterpret_cast(m_header_ioctx.cct()); @@ -95,7 +96,7 @@ void Journaler::set_up(ContextWQ *work_queue, SafeTimer *timer, m_metadata = new JournalMetadata(work_queue, timer, timer_lock, m_header_ioctx, m_header_oid, m_client_id, - commit_interval); + settings); m_metadata->get(); } diff --git a/src/journal/Journaler.h b/src/journal/Journaler.h index afa3c88075473..93a89bb1ff175 100644 --- a/src/journal/Journaler.h +++ b/src/journal/Journaler.h @@ -28,6 +28,7 @@ class JournalRecorder; class JournalTrimmer; class ReplayEntry; class ReplayHandler; +class Settings; class Journaler { public: @@ -51,10 +52,10 @@ class Journaler { const std::string &journal_id); Journaler(librados::IoCtx &header_ioctx, const std::string &journal_id, - const std::string &client_id, double commit_interval); + const std::string &client_id, const Settings &settings); Journaler(ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock, librados::IoCtx &header_ioctx, const std::string &journal_id, - const std::string &client_id, double commit_interval); + const std::string &client_id, const Settings &settings); ~Journaler(); int exists(bool *header_exists) const; @@ -145,7 +146,7 @@ class Journaler { void set_up(ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock, librados::IoCtx &header_ioctx, const std::string &journal_id, - double commit_interval); + const Settings &settings); int init_complete(); void create_player(ReplayHandler *replay_handler); diff --git a/src/journal/Makefile.am b/src/journal/Makefile.am index 8e222d95d6b7a..ad4d54dc48850 100644 --- a/src/journal/Makefile.am +++ b/src/journal/Makefile.am @@ -31,6 +31,7 @@ noinst_HEADERS += \ journal/ObjectRecorder.h \ journal/ReplayEntry.h \ journal/ReplayHandler.h \ + journal/Settings.h \ journal/Utils.h DENCODER_DEPS += libjournal.la diff --git a/src/journal/Settings.h b/src/journal/Settings.h new file mode 100644 index 0000000000000..958073414588a --- /dev/null +++ b/src/journal/Settings.h @@ -0,0 +1,17 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_JOURNAL_SETTINGS_H +#define CEPH_JOURNAL_SETTINGS_H + +#include "include/int_types.h" + +namespace journal { + +struct Settings { + double commit_interval = 5; ///< commit position throttle (in secs) +}; + +} // namespace journal + +#endif // # CEPH_JOURNAL_SETTINGS_H diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc index 5b8e305a908a8..dda73e38672d4 100644 --- a/src/librbd/Journal.cc +++ b/src/librbd/Journal.cc @@ -11,6 +11,7 @@ #include "cls/journal/cls_journal_types.h" #include "journal/Journaler.h" #include "journal/ReplayEntry.h" +#include "journal/Settings.h" #include "common/errno.h" #include "common/Timer.h" #include "common/WorkQueue.h" @@ -374,8 +375,7 @@ int Journal::create(librados::IoCtx &io_ctx, const std::string &image_id, pool_id = data_io_ctx.get_id(); } - Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID, - cct->_conf->rbd_journal_commit_age); + Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID, {}); int r = journaler.create(order, splay_width, pool_id); if (r < 0) { @@ -413,8 +413,7 @@ int Journal::remove(librados::IoCtx &io_ctx, const std::string &image_id) { CephContext *cct = reinterpret_cast(io_ctx.cct()); ldout(cct, 5) << __func__ << ": image=" << image_id << dendl; - Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID, - cct->_conf->rbd_journal_commit_age); + Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID, {}); bool journal_exists; int r = journaler.exists(&journal_exists); @@ -455,8 +454,7 @@ int Journal::reset(librados::IoCtx &io_ctx, const std::string &image_id) { CephContext *cct = reinterpret_cast(io_ctx.cct()); ldout(cct, 5) << __func__ << ": image=" << image_id << dendl; - Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID, - cct->_conf->rbd_journal_commit_age); + Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID, {}); C_SaferCond cond; journaler.init(&cond); @@ -533,8 +531,7 @@ int Journal::get_tag_owner(IoCtx& io_ctx, std::string& image_id, CephContext *cct = (CephContext *)io_ctx.cct(); ldout(cct, 20) << __func__ << dendl; - Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID, - cct->_conf->rbd_journal_commit_age); + Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID, {}); cls::journal::Client client; journal::ImageClientMeta client_meta; @@ -553,8 +550,7 @@ int Journal::request_resync(I *image_ctx) { CephContext *cct = image_ctx->cct; ldout(cct, 20) << __func__ << dendl; - Journaler journaler(image_ctx->md_ctx, image_ctx->id, IMAGE_CLIENT_ID, - image_ctx->cct->_conf->rbd_journal_commit_age); + Journaler journaler(image_ctx->md_ctx, image_ctx->id, IMAGE_CLIENT_ID, {}); cls::journal::Client client; journal::ImageClientMeta client_meta; @@ -592,8 +588,7 @@ int Journal::promote(I *image_ctx) { CephContext *cct = image_ctx->cct; ldout(cct, 20) << __func__ << dendl; - Journaler journaler(image_ctx->md_ctx, image_ctx->id, IMAGE_CLIENT_ID, - image_ctx->cct->_conf->rbd_journal_commit_age); + Journaler journaler(image_ctx->md_ctx, image_ctx->id, IMAGE_CLIENT_ID, {}); cls::journal::Client client; journal::ImageClientMeta client_meta; @@ -1194,9 +1189,12 @@ void Journal::create_journaler() { assert(m_journaler == NULL); transition_state(STATE_INITIALIZING, 0); + ::journal::Settings settings; + settings.commit_interval = m_image_ctx.journal_commit_age; + m_journaler = new Journaler(m_work_queue, m_timer, m_timer_lock, m_image_ctx.md_ctx, m_image_ctx.id, - IMAGE_CLIENT_ID, m_image_ctx.journal_commit_age); + IMAGE_CLIENT_ID, settings); m_journaler->init(create_async_context_callback( m_image_ctx, create_context_callback< Journal, &Journal::handle_initialized>(this))); diff --git a/src/test/journal/RadosTestFixture.cc b/src/test/journal/RadosTestFixture.cc index f57e5aed8c42c..40ff485203aff 100644 --- a/src/test/journal/RadosTestFixture.cc +++ b/src/test/journal/RadosTestFixture.cc @@ -5,6 +5,7 @@ #include "cls/journal/cls_journal_client.h" #include "include/stringify.h" #include "common/WorkQueue.h" +#include "journal/Settings.h" RadosTestFixture::RadosTestFixture() : m_timer_lock("m_timer_lock"), m_timer(NULL), m_listener(this) { @@ -67,10 +68,12 @@ int RadosTestFixture::create(const std::string &oid, uint8_t order, journal::JournalMetadataPtr RadosTestFixture::create_metadata( const std::string &oid, const std::string &client_id, - double commit_internal) { + double commit_interval) { + journal::Settings settings; + settings.commit_interval = commit_interval; + journal::JournalMetadataPtr metadata(new journal::JournalMetadata( - m_work_queue, m_timer, &m_timer_lock, m_ioctx, oid, client_id, - commit_internal)); + m_work_queue, m_timer, &m_timer_lock, m_ioctx, oid, client_id, settings)); m_metadatas.push_back(metadata); return metadata; } diff --git a/src/test/journal/mock/MockJournaler.h b/src/test/journal/mock/MockJournaler.h index ee8212d8dc2e0..3912a47ff4e93 100644 --- a/src/test/journal/mock/MockJournaler.h +++ b/src/test/journal/mock/MockJournaler.h @@ -20,6 +20,7 @@ class SafeTimer; namespace journal { struct ReplayHandler; +struct Settings; struct MockFuture { static MockFuture *s_instance; @@ -136,13 +137,13 @@ struct MockJournaler { struct MockJournalerProxy { template MockJournalerProxy(IoCtxT &header_ioctx, const std::string &, - const std::string &, double) { + const std::string &, const Settings&) { MockJournaler::get_instance().construct(); } MockJournalerProxy(ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock, librados::IoCtx &header_ioctx, const std::string &journal_id, - const std::string &client_id, double commit_interval) { + const std::string &client_id, const Settings&) { MockJournaler::get_instance().construct(); } diff --git a/src/test/journal/test_Journaler.cc b/src/test/journal/test_Journaler.cc index 4a4ecbae01995..d40069505c108 100644 --- a/src/test/journal/test_Journaler.cc +++ b/src/test/journal/test_Journaler.cc @@ -2,6 +2,7 @@ // vim: ts=8 sw=2 smarttab #include "journal/Journaler.h" +#include "journal/Settings.h" #include "include/stringify.h" #include "gtest/gtest.h" #include "test/librados/test.h" @@ -21,7 +22,7 @@ class TestJournaler : public RadosTestFixture { RadosTestFixture::SetUp(); m_journal_id = get_temp_journal_id(); m_journaler = new journal::Journaler(m_work_queue, m_timer, &m_timer_lock, - m_ioctx, m_journal_id, CLIENT_ID, 5); + m_ioctx, m_journal_id, CLIENT_ID, {}); } virtual void TearDown() { @@ -47,7 +48,7 @@ class TestJournaler : public RadosTestFixture { int register_client(const std::string &client_id, const std::string &desc) { journal::Journaler journaler(m_work_queue, m_timer, &m_timer_lock, - m_ioctx, m_journal_id, client_id, 5); + m_ioctx, m_journal_id, client_id, {}); bufferlist data; data.append(desc); C_SaferCond cond; @@ -57,7 +58,7 @@ class TestJournaler : public RadosTestFixture { int update_client(const std::string &client_id, const std::string &desc) { journal::Journaler journaler(m_work_queue, m_timer, &m_timer_lock, - m_ioctx, m_journal_id, client_id, 5); + m_ioctx, m_journal_id, client_id, {}); bufferlist data; data.append(desc); C_SaferCond cond; @@ -67,7 +68,7 @@ class TestJournaler : public RadosTestFixture { int unregister_client(const std::string &client_id) { journal::Journaler journaler(m_work_queue, m_timer, &m_timer_lock, - m_ioctx, m_journal_id, client_id, 5); + m_ioctx, m_journal_id, client_id, {}); C_SaferCond cond; journaler.unregister_client(&cond); return cond.wait(); diff --git a/src/test/librbd/fsx.cc b/src/test/librbd/fsx.cc index 963322d88852b..8fba0701d450e 100644 --- a/src/test/librbd/fsx.cc +++ b/src/test/librbd/fsx.cc @@ -53,6 +53,7 @@ #include "journal/Journaler.h" #include "journal/ReplayEntry.h" #include "journal/ReplayHandler.h" +#include "journal/Settings.h" #include @@ -323,7 +324,7 @@ int register_journal(rados_ioctx_t ioctx, const char *image_name) { return r; } - journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, 0); + journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, {}); r = journaler.register_client(bufferlist()); if (r < 0) { simple_err("failed to register journal client", r); @@ -342,7 +343,7 @@ int unregister_journal(rados_ioctx_t ioctx, const char *image_name) { return r; } - journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, 0); + journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, {}); r = journaler.unregister_client(); if (r < 0) { simple_err("failed to unregister journal client", r); @@ -394,7 +395,7 @@ int replay_journal(rados_ioctx_t ioctx, const char *image_name, return r; } - journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, 0); + journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, {}); C_SaferCond init_ctx; journaler.init(&init_ctx); BOOST_SCOPE_EXIT_ALL( (&journaler) ) { @@ -407,7 +408,7 @@ int replay_journal(rados_ioctx_t ioctx, const char *image_name, return r; } - journal::Journaler replay_journaler(io_ctx, replay_image_id, "", 0); + journal::Journaler replay_journaler(io_ctx, replay_image_id, "", {}); C_SaferCond replay_init_ctx; replay_journaler.init(&replay_init_ctx); diff --git a/src/test/librbd/journal/test_Entries.cc b/src/test/librbd/journal/test_Entries.cc index bd984fd16928e..3fd24f27c688c 100644 --- a/src/test/librbd/journal/test_Entries.cc +++ b/src/test/librbd/journal/test_Entries.cc @@ -11,6 +11,7 @@ #include "journal/Journaler.h" #include "journal/ReplayEntry.h" #include "journal/ReplayHandler.h" +#include "journal/Settings.h" #include #include @@ -66,7 +67,7 @@ class TestJournalEntries : public TestFixture { journal::Journaler *create_journaler(librbd::ImageCtx *ictx) { journal::Journaler *journaler = new journal::Journaler( - ictx->md_ctx, ictx->id, "dummy client", 1); + ictx->md_ctx, ictx->id, "dummy client", {}); int r = journaler->register_client(bufferlist()); if (r < 0) { diff --git a/src/test/librbd/test_mirroring.cc b/src/test/librbd/test_mirroring.cc index 912f098985b2e..fcd98ec767f0b 100644 --- a/src/test/librbd/test_mirroring.cc +++ b/src/test/librbd/test_mirroring.cc @@ -24,6 +24,7 @@ #include "librbd/Operations.h" #include "librbd/journal/Types.h" #include "journal/Journaler.h" +#include "journal/Settings.h" #include #include #include @@ -267,7 +268,7 @@ class TestMirroring : public TestFixture { "remote-image-id", {{"sync-point-snap", boost::none}}, {}); librbd::journal::ClientData client_data(peer_client_meta); - journal::Journaler journaler(io_ctx, image_id, "peer-client", 5); + journal::Journaler journaler(io_ctx, image_id, "peer-client", {}); C_SaferCond init_ctx; journaler.init(&init_ctx); ASSERT_EQ(-ENOENT, init_ctx.wait()); diff --git a/src/test/rbd_mirror/test_ImageSync.cc b/src/test/rbd_mirror/test_ImageSync.cc index eb93e6d7e2de2..6615cb834c902 100644 --- a/src/test/rbd_mirror/test_ImageSync.cc +++ b/src/test/rbd_mirror/test_ImageSync.cc @@ -5,6 +5,7 @@ #include "include/stringify.h" #include "include/rbd/librbd.hpp" #include "journal/Journaler.h" +#include "journal/Settings.h" #include "librbd/AioImageRequestWQ.h" #include "librbd/ExclusiveLock.h" #include "librbd/ImageCtx.h" @@ -54,7 +55,7 @@ class TestImageSync : public TestFixture { m_remote_journaler = new ::journal::Journaler( m_threads->work_queue, m_threads->timer, &m_threads->timer_lock, - m_remote_io_ctx, m_remote_image_ctx->id, "mirror-uuid", 5); + m_remote_io_ctx, m_remote_image_ctx->id, "mirror-uuid", {}); m_client_meta = {"image-id"}; diff --git a/src/tools/rbd/action/Journal.cc b/src/tools/rbd/action/Journal.cc index 0c85c26a9a291..ca2620a812783 100644 --- a/src/tools/rbd/action/Journal.cc +++ b/src/tools/rbd/action/Journal.cc @@ -20,6 +20,7 @@ #include "journal/Journaler.h" #include "journal/ReplayEntry.h" #include "journal/ReplayHandler.h" +#include "journal/Settings.h" #include "librbd/journal/Types.h" namespace rbd { @@ -171,7 +172,7 @@ class Journaler : public ::journal::Journaler { public: Journaler(librados::IoCtx& io_ctx, const std::string& journal_id, const std::string &client_id) : - ::journal::Journaler(io_ctx, journal_id, client_id, 5) { + ::journal::Journaler(io_ctx, journal_id, client_id, {}) { } int init() { diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index 8eb4e0760a5ef..c2b25d5e379e5 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -11,6 +11,7 @@ #include "global/global_context.h" #include "journal/Journaler.h" #include "journal/ReplayHandler.h" +#include "journal/Settings.h" #include "librbd/ExclusiveLock.h" #include "librbd/ImageCtx.h" #include "librbd/ImageState.h" @@ -372,12 +373,14 @@ void ImageReplayer::start(Context *on_finish, bool manual) } CephContext *cct = static_cast(m_local->cct()); - double commit_interval = cct->_conf->rbd_journal_commit_age; + journal::Settings settings; + settings.commit_interval = cct->_conf->rbd_journal_commit_age; + m_remote_journaler = new Journaler(m_threads->work_queue, m_threads->timer, &m_threads->timer_lock, m_remote_ioctx, m_remote_image_id, m_local_mirror_uuid, - commit_interval); + settings); bootstrap(); } From f7362e9a57e484fffd840ca0eef01778dcacb65b Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Mon, 18 Jul 2016 11:01:26 -0400 Subject: [PATCH 02/16] journal: optionally fetch entries in small chunks during replay Support fetching the full object or incremental chunks (with a minimum of at least a single decoded entry if available). Signed-off-by: Jason Dillaman --- src/journal/JournalMetadata.h | 3 + src/journal/JournalPlayer.cc | 3 +- src/journal/ObjectPlayer.cc | 114 ++++++++++---- src/journal/ObjectPlayer.h | 13 +- src/journal/Settings.h | 1 + src/test/journal/test_ObjectPlayer.cc | 210 ++++++++++++++------------ 6 files changed, 213 insertions(+), 131 deletions(-) diff --git a/src/journal/JournalMetadata.h b/src/journal/JournalMetadata.h index 046e77dac7b47..01116d7d1e016 100644 --- a/src/journal/JournalMetadata.h +++ b/src/journal/JournalMetadata.h @@ -74,6 +74,9 @@ class JournalMetadata : public RefCountedObject, boost::noncopyable { void get_tags(const boost::optional &tag_class, Tags *tags, Context *on_finish); + inline const Settings &get_settings() const { + return m_settings; + } inline const std::string &get_client_id() const { return m_client_id; } diff --git a/src/journal/JournalPlayer.cc b/src/journal/JournalPlayer.cc index 28905a2ed1304..a06ca620f7cd0 100644 --- a/src/journal/JournalPlayer.cc +++ b/src/journal/JournalPlayer.cc @@ -624,7 +624,8 @@ void JournalPlayer::fetch(uint64_t object_num) { C_Fetch *fetch_ctx = new C_Fetch(this, object_num); ObjectPlayerPtr object_player(new ObjectPlayer( m_ioctx, m_object_oid_prefix, object_num, m_journal_metadata->get_timer(), - m_journal_metadata->get_timer_lock(), m_journal_metadata->get_order())); + m_journal_metadata->get_timer_lock(), m_journal_metadata->get_order(), + m_journal_metadata->get_settings().max_fetch_bytes)); uint8_t splay_width = m_journal_metadata->get_splay_width(); m_object_players[object_num % splay_width][object_num] = object_player; diff --git a/src/journal/ObjectPlayer.cc b/src/journal/ObjectPlayer.cc index f86e3ef93702e..156da728e9713 100644 --- a/src/journal/ObjectPlayer.cc +++ b/src/journal/ObjectPlayer.cc @@ -15,13 +15,15 @@ namespace journal { ObjectPlayer::ObjectPlayer(librados::IoCtx &ioctx, const std::string &object_oid_prefix, uint64_t object_num, SafeTimer &timer, - Mutex &timer_lock, uint8_t order) + Mutex &timer_lock, uint8_t order, + uint64_t max_fetch_bytes) : RefCountedObject(NULL, 0), m_object_num(object_num), m_oid(utils::get_object_name(object_oid_prefix, m_object_num)), m_cct(NULL), m_timer(timer), m_timer_lock(timer_lock), m_order(order), + m_max_fetch_bytes(max_fetch_bytes > 0 ? max_fetch_bytes : 2 << order), m_watch_interval(0), m_watch_task(NULL), m_lock(utils::unique_lock_name("ObjectPlayer::m_lock", this)), - m_fetch_in_progress(false), m_read_off(0) { + m_fetch_in_progress(false) { m_ioctx.dup(ioctx); m_cct = reinterpret_cast(m_ioctx.cct()); } @@ -39,11 +41,12 @@ void ObjectPlayer::fetch(Context *on_finish) { ldout(m_cct, 10) << __func__ << ": " << m_oid << dendl; Mutex::Locker locker(m_lock); + assert(!m_fetch_in_progress); m_fetch_in_progress = true; C_Fetch *context = new C_Fetch(this, on_finish); librados::ObjectReadOperation op; - op.read(m_read_off, 2 << m_order, &context->read_bl, NULL); + op.read(m_read_off, m_max_fetch_bytes, &context->read_bl, NULL); op.set_op_flags2(CEPH_OSD_OP_FLAG_FADVISE_DONTNEED); librados::AioCompletion *rados_completion = @@ -95,14 +98,18 @@ void ObjectPlayer::front(Entry *entry) const { void ObjectPlayer::pop_front() { Mutex::Locker locker(m_lock); assert(!m_entries.empty()); + + auto &entry = m_entries.front(); + m_entry_keys.erase({entry.get_tag_tid(), entry.get_entry_tid()}); m_entries.pop_front(); } -int ObjectPlayer::handle_fetch_complete(int r, const bufferlist &bl) { +int ObjectPlayer::handle_fetch_complete(int r, const bufferlist &bl, + bool *refetch) { ldout(m_cct, 10) << __func__ << ": " << m_oid << ", r=" << r << ", len=" << bl.length() << dendl; - m_fetch_in_progress = false; + *refetch = false; if (r < 0) { return r; } @@ -111,25 +118,37 @@ int ObjectPlayer::handle_fetch_complete(int r, const bufferlist &bl) { } Mutex::Locker locker(m_lock); + assert(m_fetch_in_progress); + m_read_off += bl.length(); m_read_bl.append(bl); + bool full_fetch = (m_max_fetch_bytes == 2U << m_order); + bool partial_entry = false; bool invalid = false; uint32_t invalid_start_off = 0; - bufferlist::iterator iter(&m_read_bl, m_read_off); + clear_invalid_range(m_read_bl_off, m_read_bl.length()); + bufferlist::iterator iter(&m_read_bl, 0); while (!iter.end()) { uint32_t bytes_needed; + uint32_t bl_off = iter.get_off(); if (!Entry::is_readable(iter, &bytes_needed)) { if (bytes_needed != 0) { - invalid_start_off = iter.get_off(); + invalid_start_off = m_read_bl_off + bl_off; invalid = true; - lderr(m_cct) << ": partial record at offset " << iter.get_off() - << dendl; + partial_entry = true; + if (full_fetch) { + lderr(m_cct) << ": partial record at offset " << invalid_start_off + << dendl; + } else { + ldout(m_cct, 20) << ": partial record detected, will re-fetch" + << dendl; + } break; } if (!invalid) { - invalid_start_off = iter.get_off(); + invalid_start_off = m_read_bl_off + bl_off; invalid = true; lderr(m_cct) << ": detected corrupt journal entry at offset " << invalid_start_off << dendl; @@ -138,18 +157,21 @@ int ObjectPlayer::handle_fetch_complete(int r, const bufferlist &bl) { continue; } + Entry entry; + ::decode(entry, iter); + ldout(m_cct, 20) << ": " << entry << " decoded" << dendl; + + uint32_t entry_len = iter.get_off() - bl_off; if (invalid) { - uint32_t invalid_end_off = iter.get_off(); + // new corrupt region detected + uint32_t invalid_end_off = m_read_bl_off + bl_off; lderr(m_cct) << ": corruption range [" << invalid_start_off << ", " << invalid_end_off << ")" << dendl; - m_invalid_ranges.insert(invalid_start_off, invalid_end_off); + m_invalid_ranges.insert(invalid_start_off, + invalid_end_off - invalid_start_off); invalid = false; } - Entry entry; - ::decode(entry, iter); - ldout(m_cct, 20) << ": " << entry << " decoded" << dendl; - EntryKey entry_key(std::make_pair(entry.get_tag_tid(), entry.get_entry_tid())); if (m_entry_keys.find(entry_key) == m_entry_keys.end()) { @@ -158,20 +180,49 @@ int ObjectPlayer::handle_fetch_complete(int r, const bufferlist &bl) { ldout(m_cct, 10) << ": " << entry << " is duplicate, replacing" << dendl; *m_entry_keys[entry_key] = entry; } + + // prune decoded / corrupted journal entries from front of bl + bufferlist sub_bl; + sub_bl.substr_of(m_read_bl, iter.get_off(), + m_read_bl.length() - iter.get_off()); + sub_bl.swap(m_read_bl); + iter = bufferlist::iterator(&m_read_bl, 0); + + // advance the decoded entry offset + m_read_bl_off += entry_len; } - m_read_off = m_read_bl.length(); if (invalid) { - uint32_t invalid_end_off = m_read_bl.length(); - lderr(m_cct) << ": corruption range [" << invalid_start_off - << ", " << invalid_end_off << ")" << dendl; - m_invalid_ranges.insert(invalid_start_off, invalid_end_off); + uint32_t invalid_end_off = m_read_bl_off + m_read_bl.length(); + if (!partial_entry) { + lderr(m_cct) << ": corruption range [" << invalid_start_off + << ", " << invalid_end_off << ")" << dendl; + } + m_invalid_ranges.insert(invalid_start_off, + invalid_end_off - invalid_start_off); } - if (!m_invalid_ranges.empty()) { - r = -EBADMSG; + if (!m_invalid_ranges.empty() && !partial_entry) { + return -EBADMSG; + } else if (partial_entry && (full_fetch || m_entries.empty())) { + *refetch = true; + return -EAGAIN; + } + + return 0; +} + +void ObjectPlayer::clear_invalid_range(uint32_t off, uint32_t len) { + // possibly remove previously partial record region + InvalidRanges decode_range; + decode_range.insert(off, len); + InvalidRanges intersect_range; + intersect_range.intersection_of(m_invalid_ranges, decode_range); + if (!intersect_range.empty()) { + ldout(m_cct, 20) << ": clearing invalid range: " << intersect_range + << dendl; + m_invalid_ranges.subtract(intersect_range); } - return r; } void ObjectPlayer::schedule_watch() { @@ -236,9 +287,20 @@ void ObjectPlayer::handle_watch_fetched(int r) { } void ObjectPlayer::C_Fetch::finish(int r) { - r = object_player->handle_fetch_complete(r, read_bl); - object_player.reset(); + bool refetch = false; + r = object_player->handle_fetch_complete(r, read_bl, &refetch); + { + Mutex::Locker locker(object_player->m_lock); + object_player->m_fetch_in_progress = false; + } + + if (refetch) { + object_player->fetch(on_finish); + return; + } + + object_player.reset(); on_finish->complete(r); } diff --git a/src/journal/ObjectPlayer.h b/src/journal/ObjectPlayer.h index d0809cec8fe26..74ec94bd31b7a 100644 --- a/src/journal/ObjectPlayer.h +++ b/src/journal/ObjectPlayer.h @@ -32,7 +32,7 @@ class ObjectPlayer : public RefCountedObject { ObjectPlayer(librados::IoCtx &ioctx, const std::string &object_oid_prefix, uint64_t object_num, SafeTimer &timer, Mutex &timer_lock, - uint8_t order); + uint8_t order, uint64_t max_fetch_bytes); ~ObjectPlayer(); inline const std::string &get_oid() const { @@ -77,8 +77,7 @@ class ObjectPlayer : public RefCountedObject { ObjectPlayerPtr object_player; Context *on_finish; bufferlist read_bl; - C_Fetch(ObjectPlayer *o, Context *ctx) - : object_player(o), on_finish(ctx) { + C_Fetch(ObjectPlayer *o, Context *ctx) : object_player(o), on_finish(ctx) { } virtual void finish(int r); }; @@ -104,6 +103,7 @@ class ObjectPlayer : public RefCountedObject { Mutex &m_timer_lock; uint8_t m_order; + uint64_t m_max_fetch_bytes; double m_watch_interval; Context *m_watch_task; @@ -111,7 +111,8 @@ class ObjectPlayer : public RefCountedObject { mutable Mutex m_lock; bool m_fetch_in_progress; bufferlist m_read_bl; - uint32_t m_read_off; + uint32_t m_read_off = 0; + uint32_t m_read_bl_off = 0; Entries m_entries; EntryKeys m_entry_keys; @@ -122,7 +123,9 @@ class ObjectPlayer : public RefCountedObject { bool m_unwatched = false; bool m_refetch_required = true; - int handle_fetch_complete(int r, const bufferlist &bl); + int handle_fetch_complete(int r, const bufferlist &bl, bool *refetch); + + void clear_invalid_range(uint32_t off, uint32_t len); void schedule_watch(); bool cancel_watch(); diff --git a/src/journal/Settings.h b/src/journal/Settings.h index 958073414588a..d8f5e7469301a 100644 --- a/src/journal/Settings.h +++ b/src/journal/Settings.h @@ -10,6 +10,7 @@ namespace journal { struct Settings { double commit_interval = 5; ///< commit position throttle (in secs) + uint64_t max_fetch_bytes = 0; ///< 0 implies no limit }; } // namespace journal diff --git a/src/test/journal/test_ObjectPlayer.cc b/src/test/journal/test_ObjectPlayer.cc index 67c35a1d12c64..ed4c0b667e68d 100644 --- a/src/test/journal/test_ObjectPlayer.cc +++ b/src/test/journal/test_ObjectPlayer.cc @@ -10,36 +10,79 @@ #include "test/librados/test.h" #include "test/journal/RadosTestFixture.h" +template class TestObjectPlayer : public RadosTestFixture { public: + static const uint32_t max_fetch_bytes = T::max_fetch_bytes; + journal::ObjectPlayerPtr create_object(const std::string &oid, uint8_t order) { journal::ObjectPlayerPtr object(new journal::ObjectPlayer( - m_ioctx, oid + ".", 0, *m_timer, m_timer_lock, order)); + m_ioctx, oid + ".", 0, *m_timer, m_timer_lock, order, + max_fetch_bytes)); return object; } + int fetch(journal::ObjectPlayerPtr object_player) { + while (true) { + C_SaferCond ctx; + object_player->clear_refetch_required(); + object_player->fetch(&ctx); + int r = ctx.wait(); + if (r < 0 || !object_player->refetch_required()) { + return r; + } + } + return 0; + } + + int watch_and_wait_for_entries(journal::ObjectPlayerPtr object_player, + journal::ObjectPlayer::Entries *entries, + size_t count) { + for (size_t i = 0; i < 50; ++i) { + object_player->get_entries(entries); + if (entries->size() == count) { + break; + } + + C_SaferCond ctx; + object_player->watch(&ctx, 0.1); + + int r = ctx.wait(); + if (r < 0) { + return r; + } + } + return 0; + } + std::string get_object_name(const std::string &oid) { return oid + ".0"; } }; -TEST_F(TestObjectPlayer, Fetch) { - std::string oid = get_temp_oid(); +template +struct TestObjectPlayerParams { + static const uint32_t max_fetch_bytes = _max_fetch_bytes; +}; + +typedef ::testing::Types, + TestObjectPlayerParams<10> > TestObjectPlayerTypes; +TYPED_TEST_CASE(TestObjectPlayer, TestObjectPlayerTypes); + +TYPED_TEST(TestObjectPlayer, Fetch) { + std::string oid = this->get_temp_oid(); - journal::Entry entry1(234, 123, create_payload(std::string(24, '1'))); - journal::Entry entry2(234, 124, create_payload(std::string(24, '1'))); + journal::Entry entry1(234, 123, this->create_payload(std::string(24, '1'))); + journal::Entry entry2(234, 124, this->create_payload(std::string(24, '1'))); bufferlist bl; ::encode(entry1, bl); ::encode(entry2, bl); - ASSERT_EQ(0, append(get_object_name(oid), bl)); + ASSERT_EQ(0, this->append(this->get_object_name(oid), bl)); - journal::ObjectPlayerPtr object = create_object(oid, 14); - - C_SaferCond cond; - object->fetch(&cond); - ASSERT_LE(0, cond.wait()); + journal::ObjectPlayerPtr object = this->create_object(oid, 14); + ASSERT_LE(0, this->fetch(object)); journal::ObjectPlayer::Entries entries; object->get_entries(&entries); @@ -49,48 +92,42 @@ TEST_F(TestObjectPlayer, Fetch) { ASSERT_EQ(expected_entries, entries); } -TEST_F(TestObjectPlayer, FetchLarge) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestObjectPlayer, FetchLarge) { + std::string oid = this->get_temp_oid(); journal::Entry entry1(234, 123, - create_payload(std::string(8192 - 33, '1'))); - journal::Entry entry2(234, 124, create_payload("")); + this->create_payload(std::string(8192 - 32, '1'))); + journal::Entry entry2(234, 124, this->create_payload("")); bufferlist bl; ::encode(entry1, bl); ::encode(entry2, bl); - ASSERT_EQ(0, append(get_object_name(oid), bl)); - - journal::ObjectPlayerPtr object = create_object(oid, 12); + ASSERT_EQ(0, this->append(this->get_object_name(oid), bl)); - C_SaferCond cond; - object->fetch(&cond); - ASSERT_LE(0, cond.wait()); + journal::ObjectPlayerPtr object = this->create_object(oid, 12); + ASSERT_LE(0, this->fetch(object)); journal::ObjectPlayer::Entries entries; object->get_entries(&entries); - ASSERT_EQ(1U, entries.size()); + ASSERT_EQ(2U, entries.size()); - journal::ObjectPlayer::Entries expected_entries = {entry1}; + journal::ObjectPlayer::Entries expected_entries = {entry1, entry2}; ASSERT_EQ(expected_entries, entries); } -TEST_F(TestObjectPlayer, FetchDeDup) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestObjectPlayer, FetchDeDup) { + std::string oid = this->get_temp_oid(); - journal::Entry entry1(234, 123, create_payload(std::string(24, '1'))); - journal::Entry entry2(234, 123, create_payload(std::string(24, '2'))); + journal::Entry entry1(234, 123, this->create_payload(std::string(24, '1'))); + journal::Entry entry2(234, 123, this->create_payload(std::string(24, '2'))); bufferlist bl; ::encode(entry1, bl); ::encode(entry2, bl); - ASSERT_EQ(0, append(get_object_name(oid), bl)); - - journal::ObjectPlayerPtr object = create_object(oid, 14); + ASSERT_EQ(0, this->append(this->get_object_name(oid), bl)); - C_SaferCond cond; - object->fetch(&cond); - ASSERT_LE(0, cond.wait()); + journal::ObjectPlayerPtr object = this->create_object(oid, 14); + ASSERT_LE(0, this->fetch(object)); journal::ObjectPlayer::Entries entries; object->get_entries(&entries); @@ -100,48 +137,32 @@ TEST_F(TestObjectPlayer, FetchDeDup) { ASSERT_EQ(expected_entries, entries); } -TEST_F(TestObjectPlayer, FetchEmpty) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestObjectPlayer, FetchEmpty) { + std::string oid = this->get_temp_oid(); bufferlist bl; - ASSERT_EQ(0, append(get_object_name(oid), bl)); + ASSERT_EQ(0, this->append(this->get_object_name(oid), bl)); - journal::ObjectPlayerPtr object = create_object(oid, 14); + journal::ObjectPlayerPtr object = this->create_object(oid, 14); - C_SaferCond cond; - object->fetch(&cond); - ASSERT_EQ(-ENOENT, cond.wait()); + ASSERT_EQ(-ENOENT, this->fetch(object)); ASSERT_TRUE(object->empty()); } -TEST_F(TestObjectPlayer, FetchError) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestObjectPlayer, FetchCorrupt) { + std::string oid = this->get_temp_oid(); - journal::ObjectPlayerPtr object = create_object(oid, 14); - - C_SaferCond cond; - object->fetch(&cond); - ASSERT_EQ(-ENOENT, cond.wait()); - ASSERT_TRUE(object->empty()); -} - -TEST_F(TestObjectPlayer, FetchCorrupt) { - std::string oid = get_temp_oid(); - - journal::Entry entry1(234, 123, create_payload(std::string(24, '1'))); - journal::Entry entry2(234, 124, create_payload(std::string(24, '2'))); + journal::Entry entry1(234, 123, this->create_payload(std::string(24, '1'))); + journal::Entry entry2(234, 124, this->create_payload(std::string(24, '2'))); bufferlist bl; ::encode(entry1, bl); - ::encode(create_payload("corruption"), bl); + ::encode(this->create_payload("corruption"), bl); ::encode(entry2, bl); - ASSERT_EQ(0, append(get_object_name(oid), bl)); - - journal::ObjectPlayerPtr object = create_object(oid, 14); + ASSERT_EQ(0, this->append(this->get_object_name(oid), bl)); - C_SaferCond cond; - object->fetch(&cond); - ASSERT_EQ(-EBADMSG, cond.wait()); + journal::ObjectPlayerPtr object = this->create_object(oid, 14); + ASSERT_EQ(-EBADMSG, this->fetch(object)); journal::ObjectPlayer::Entries entries; object->get_entries(&entries); @@ -151,21 +172,18 @@ TEST_F(TestObjectPlayer, FetchCorrupt) { ASSERT_EQ(expected_entries, entries); } -TEST_F(TestObjectPlayer, FetchAppend) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestObjectPlayer, FetchAppend) { + std::string oid = this->get_temp_oid(); - journal::Entry entry1(234, 123, create_payload(std::string(24, '1'))); - journal::Entry entry2(234, 124, create_payload(std::string(24, '2'))); + journal::Entry entry1(234, 123, this->create_payload(std::string(24, '1'))); + journal::Entry entry2(234, 124, this->create_payload(std::string(24, '2'))); bufferlist bl; ::encode(entry1, bl); - ASSERT_EQ(0, append(get_object_name(oid), bl)); + ASSERT_EQ(0, this->append(this->get_object_name(oid), bl)); - journal::ObjectPlayerPtr object = create_object(oid, 14); - - C_SaferCond cond1; - object->fetch(&cond1); - ASSERT_LE(0, cond1.wait()); + journal::ObjectPlayerPtr object = this->create_object(oid, 14); + ASSERT_LE(0, this->fetch(object)); journal::ObjectPlayer::Entries entries; object->get_entries(&entries); @@ -176,11 +194,8 @@ TEST_F(TestObjectPlayer, FetchAppend) { bl.clear(); ::encode(entry2, bl); - ASSERT_EQ(0, append(get_object_name(oid), bl)); - - C_SaferCond cond2; - object->fetch(&cond2); - ASSERT_LE(0, cond2.wait()); + ASSERT_EQ(0, this->append(this->get_object_name(oid), bl)); + ASSERT_LE(0, this->fetch(object)); object->get_entries(&entries); ASSERT_EQ(2U, entries.size()); @@ -189,22 +204,19 @@ TEST_F(TestObjectPlayer, FetchAppend) { ASSERT_EQ(expected_entries, entries); } -TEST_F(TestObjectPlayer, PopEntry) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestObjectPlayer, PopEntry) { + std::string oid = this->get_temp_oid(); - journal::Entry entry1(234, 123, create_payload(std::string(24, '1'))); - journal::Entry entry2(234, 124, create_payload(std::string(24, '1'))); + journal::Entry entry1(234, 123, this->create_payload(std::string(24, '1'))); + journal::Entry entry2(234, 124, this->create_payload(std::string(24, '1'))); bufferlist bl; ::encode(entry1, bl); ::encode(entry2, bl); - ASSERT_EQ(0, append(get_object_name(oid), bl)); + ASSERT_EQ(0, this->append(this->get_object_name(oid), bl)); - journal::ObjectPlayerPtr object = create_object(oid, 14); - - C_SaferCond cond; - object->fetch(&cond); - ASSERT_LE(0, cond.wait()); + journal::ObjectPlayerPtr object = this->create_object(oid, 14); + ASSERT_LE(0, this->fetch(object)); journal::ObjectPlayer::Entries entries; object->get_entries(&entries); @@ -220,23 +232,23 @@ TEST_F(TestObjectPlayer, PopEntry) { ASSERT_TRUE(object->empty()); } -TEST_F(TestObjectPlayer, Watch) { - std::string oid = get_temp_oid(); - journal::ObjectPlayerPtr object = create_object(oid, 14); +TYPED_TEST(TestObjectPlayer, Watch) { + std::string oid = this->get_temp_oid(); + journal::ObjectPlayerPtr object = this->create_object(oid, 14); C_SaferCond cond1; object->watch(&cond1, 0.1); - journal::Entry entry1(234, 123, create_payload(std::string(24, '1'))); - journal::Entry entry2(234, 124, create_payload(std::string(24, '1'))); + journal::Entry entry1(234, 123, this->create_payload(std::string(24, '1'))); + journal::Entry entry2(234, 124, this->create_payload(std::string(24, '1'))); bufferlist bl; ::encode(entry1, bl); - ASSERT_EQ(0, append(get_object_name(oid), bl)); + ASSERT_EQ(0, this->append(this->get_object_name(oid), bl)); ASSERT_LE(0, cond1.wait()); journal::ObjectPlayer::Entries entries; - object->get_entries(&entries); + ASSERT_EQ(0, this->watch_and_wait_for_entries(object, &entries, 1U)); ASSERT_EQ(1U, entries.size()); journal::ObjectPlayer::Entries expected_entries; @@ -248,19 +260,19 @@ TEST_F(TestObjectPlayer, Watch) { bl.clear(); ::encode(entry2, bl); - ASSERT_EQ(0, append(get_object_name(oid), bl)); + ASSERT_EQ(0, this->append(this->get_object_name(oid), bl)); ASSERT_LE(0, cond2.wait()); - object->get_entries(&entries); + ASSERT_EQ(0, this->watch_and_wait_for_entries(object, &entries, 2U)); ASSERT_EQ(2U, entries.size()); expected_entries = {entry1, entry2}; ASSERT_EQ(expected_entries, entries); } -TEST_F(TestObjectPlayer, Unwatch) { - std::string oid = get_temp_oid(); - journal::ObjectPlayerPtr object = create_object(oid, 14); +TYPED_TEST(TestObjectPlayer, Unwatch) { + std::string oid = this->get_temp_oid(); + journal::ObjectPlayerPtr object = this->create_object(oid, 14); C_SaferCond watch_ctx; object->watch(&watch_ctx, 600); From 8c1877b82fee0db1dba76252b32ff348226d41a7 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Wed, 20 Jul 2016 08:06:13 -0400 Subject: [PATCH 03/16] journal: optionally restrict maximum entry payload size Journal playback will need to read at least a full entry which was currently limited to the maximum object size. In memory constrained environment, this new optional limit will set a fix upper bound on memory usage. Signed-off-by: Jason Dillaman --- src/journal/Journaler.cc | 8 +++++++- src/journal/Settings.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/journal/Journaler.cc b/src/journal/Journaler.cc index 05d80693dc861..27cc33845c01f 100644 --- a/src/journal/Journaler.cc +++ b/src/journal/Journaler.cc @@ -395,7 +395,13 @@ void Journaler::stop_append(Context *on_safe) { } uint64_t Journaler::get_max_append_size() const { - return m_metadata->get_object_size() - Entry::get_fixed_size(); + uint64_t max_payload_size = m_metadata->get_object_size() - + Entry::get_fixed_size(); + if (m_metadata->get_settings().max_payload_bytes > 0) { + max_payload_size = MIN(max_payload_size, + m_metadata->get_settings().max_payload_bytes); + } + return max_payload_size; } Future Journaler::append(uint64_t tag_tid, const bufferlist &payload_bl) { diff --git a/src/journal/Settings.h b/src/journal/Settings.h index d8f5e7469301a..603770cbaf65c 100644 --- a/src/journal/Settings.h +++ b/src/journal/Settings.h @@ -11,6 +11,7 @@ namespace journal { struct Settings { double commit_interval = 5; ///< commit position throttle (in secs) uint64_t max_fetch_bytes = 0; ///< 0 implies no limit + uint64_t max_payload_bytes = 0; ///< 0 implies object size limit }; } // namespace journal From 2666d366645b22a5db2a2bcbfce466726bf0b3c0 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Mon, 18 Jul 2016 15:15:58 -0400 Subject: [PATCH 04/16] journal: replay should only read from a single object set Previously it was prefetching up to 2 object sets worth of journal data objects which consumed too much memory. Signed-off-by: Jason Dillaman --- src/journal/JournalPlayer.cc | 87 +++++++++++------------------------- src/journal/JournalPlayer.h | 5 +-- 2 files changed, 27 insertions(+), 65 deletions(-) diff --git a/src/journal/JournalPlayer.cc b/src/journal/JournalPlayer.cc index a06ca620f7cd0..184db8eeb668a 100644 --- a/src/journal/JournalPlayer.cc +++ b/src/journal/JournalPlayer.cc @@ -107,9 +107,7 @@ void JournalPlayer::prefetch() { splay_offset_to_objects[position.first] = position.second.object_number; } - // prefetch the active object for each splay offset (and the following object) - uint64_t active_set = m_journal_metadata->get_active_set(); - uint64_t max_object_number = (splay_width * (active_set + 1)) - 1; + // prefetch the active object for each splay offset std::set prefetch_object_numbers; for (uint8_t splay_offset = 0; splay_offset < splay_width; ++splay_offset) { uint64_t object_number = splay_offset; @@ -118,9 +116,6 @@ void JournalPlayer::prefetch() { } prefetch_object_numbers.insert(object_number); - if (object_number + splay_width <= max_object_number) { - prefetch_object_numbers.insert(object_number + splay_width); - } } ldout(m_cct, 10) << __func__ << ": prefetching " @@ -156,7 +151,7 @@ void JournalPlayer::shut_down(Context *on_finish) { ObjectPlayerPtr object_player = get_object_player(); switch (m_watch_step) { case WATCH_STEP_FETCH_FIRST: - object_player = m_object_players.begin()->second.begin()->second; + object_player = m_object_players.begin()->second; // fallthrough case WATCH_STEP_FETCH_CURRENT: object_player->unwatch(); @@ -266,12 +261,11 @@ int JournalPlayer::process_prefetch(uint64_t object_number) { bool prefetch_complete = false; assert(m_object_players.count(splay_offset) == 1); - ObjectPlayers &object_players = m_object_players[splay_offset]; + ObjectPlayerPtr object_player = m_object_players[splay_offset]; // prefetch in-order since a newer splay object could prefetch first while (m_fetch_object_numbers.count( - object_players.begin()->second->get_object_number()) == 0) { - ObjectPlayerPtr object_player = object_players.begin()->second; + object_player->get_object_number()) == 0) { uint64_t player_object_number = object_player->get_object_number(); // skip past known committed records @@ -492,28 +486,23 @@ void JournalPlayer::prune_tag(uint64_t tag_tid) { m_prune_tag_tid = tag_tid; } - for (auto &players : m_object_players) { - for (auto player_pair : players.second) { - ObjectPlayerPtr object_player = player_pair.second; - ldout(m_cct, 15) << __func__ << ": checking " << object_player->get_oid() - << dendl; - while (!object_player->empty()) { - Entry entry; - object_player->front(&entry); - if (entry.get_tag_tid() == tag_tid) { - ldout(m_cct, 20) << __func__ << ": pruned " << entry << dendl; - object_player->pop_front(); - } else { - break; - } + for (auto &player_pair : m_object_players) { + ObjectPlayerPtr object_player(player_pair.second); + ldout(m_cct, 15) << __func__ << ": checking " << object_player->get_oid() + << dendl; + while (!object_player->empty()) { + Entry entry; + object_player->front(&entry); + if (entry.get_tag_tid() == tag_tid) { + ldout(m_cct, 20) << __func__ << ": pruned " << entry << dendl; + object_player->pop_front(); + } else { + break; } } - // trim any empty players to prefetch the next available object - ObjectPlayers object_players(players.second); - for (auto player_pair : object_players) { - remove_empty_object_player(player_pair.second); - } + // trim empty player to prefetch the next available object + remove_empty_object_player(player_pair.second); } } @@ -531,23 +520,15 @@ void JournalPlayer::prune_active_tag(const boost::optional& tag_tid) { prune_tag(active_tag_tid); } -const JournalPlayer::ObjectPlayers &JournalPlayer::get_object_players() const { +ObjectPlayerPtr JournalPlayer::get_object_player() const { assert(m_lock.is_locked()); SplayedObjectPlayers::const_iterator it = m_object_players.find( m_splay_offset); assert(it != m_object_players.end()); - return it->second; } -ObjectPlayerPtr JournalPlayer::get_object_player() const { - assert(m_lock.is_locked()); - - const ObjectPlayers &object_players = get_object_players(); - return object_players.begin()->second; -} - ObjectPlayerPtr JournalPlayer::get_object_player(uint64_t object_number) const { assert(m_lock.is_locked()); @@ -556,17 +537,9 @@ ObjectPlayerPtr JournalPlayer::get_object_player(uint64_t object_number) const { auto splay_it = m_object_players.find(splay_offset); assert(splay_it != m_object_players.end()); - const ObjectPlayers &object_players = splay_it->second; - auto player_it = object_players.find(object_number); - assert(player_it != object_players.end()); - return player_it->second; -} - -ObjectPlayerPtr JournalPlayer::get_next_set_object_player() const { - assert(m_lock.is_locked()); - - const ObjectPlayers &object_players = get_object_players(); - return object_players.rbegin()->second; + ObjectPlayerPtr object_player = splay_it->second; + assert(object_player->get_object_number() == object_number); + return object_player; } void JournalPlayer::advance_splay_object() { @@ -599,16 +572,8 @@ bool JournalPlayer::remove_empty_object_player(const ObjectPlayerPtr &player) { m_watch_prune_active_tag = false; m_watch_step = WATCH_STEP_FETCH_CURRENT; - ObjectPlayers &object_players = m_object_players[ - player->get_object_number() % splay_width]; - assert(!object_players.empty()); - - uint64_t next_object_num = object_players.rbegin()->first + splay_width; - uint64_t next_object_set = next_object_num / splay_width; - if (next_object_set <= active_set) { - fetch(next_object_num); - } - object_players.erase(player->get_object_number()); + uint64_t next_object_num = player->get_object_number() + splay_width; + fetch(next_object_num); return true; } @@ -628,7 +593,7 @@ void JournalPlayer::fetch(uint64_t object_num) { m_journal_metadata->get_settings().max_fetch_bytes)); uint8_t splay_width = m_journal_metadata->get_splay_width(); - m_object_players[object_num % splay_width][object_num] = object_player; + m_object_players[object_num % splay_width] = object_player; object_player->fetch(fetch_ctx); } @@ -699,7 +664,7 @@ void JournalPlayer::schedule_watch() { } break; case WATCH_STEP_FETCH_FIRST: - object_player = m_object_players.begin()->second.begin()->second; + object_player = m_object_players.begin()->second; watch_interval = 0; break; default: diff --git a/src/journal/JournalPlayer.h b/src/journal/JournalPlayer.h index f50258296c9fb..8ede43c57040a 100644 --- a/src/journal/JournalPlayer.h +++ b/src/journal/JournalPlayer.h @@ -42,8 +42,7 @@ class JournalPlayer { private: typedef std::set PrefetchSplayOffsets; - typedef std::map ObjectPlayers; - typedef std::map SplayedObjectPlayers; + typedef std::map SplayedObjectPlayers; typedef std::map SplayedObjectPositions; typedef std::set ObjectNumbers; @@ -129,10 +128,8 @@ class JournalPlayer { void prune_tag(uint64_t tag_tid); void prune_active_tag(const boost::optional& tag_tid); - const ObjectPlayers &get_object_players() const; ObjectPlayerPtr get_object_player() const; ObjectPlayerPtr get_object_player(uint64_t object_number) const; - ObjectPlayerPtr get_next_set_object_player() const; bool remove_empty_object_player(const ObjectPlayerPtr &object_player); void process_state(uint64_t object_number, int r); From 28d5ca16cbcb445f985469413b2a8a3048ab66b7 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Mon, 18 Jul 2016 15:34:53 -0400 Subject: [PATCH 05/16] journal: support streaming entry playback Now that it's possible for the ObjectPlayer to only read a partial subset of available entries, the JournalPlayer needs to detect that more entries might be available. Signed-off-by: Jason Dillaman --- src/journal/JournalPlayer.cc | 170 +++--- src/journal/JournalPlayer.h | 9 +- src/journal/ObjectPlayer.cc | 15 +- src/journal/ObjectPlayer.h | 3 + src/test/journal/RadosTestFixture.cc | 3 +- src/test/journal/RadosTestFixture.h | 3 +- src/test/journal/test_JournalPlayer.cc | 748 +++++++++++++------------ src/test/journal/test_ObjectPlayer.cc | 2 +- 8 files changed, 505 insertions(+), 448 deletions(-) diff --git a/src/journal/JournalPlayer.cc b/src/journal/JournalPlayer.cc index 184db8eeb668a..30d6cdd36cfa0 100644 --- a/src/journal/JournalPlayer.cc +++ b/src/journal/JournalPlayer.cc @@ -53,8 +53,7 @@ JournalPlayer::JournalPlayer(librados::IoCtx &ioctx, : m_cct(NULL), m_object_oid_prefix(object_oid_prefix), m_journal_metadata(journal_metadata), m_replay_handler(replay_handler), m_lock("JournalPlayer::m_lock"), m_state(STATE_INIT), m_splay_offset(0), - m_watch_enabled(false), m_watch_scheduled(false), m_watch_interval(0), - m_commit_object(0) { + m_watch_enabled(false), m_watch_scheduled(false), m_watch_interval(0) { m_replay_handler->get(); m_ioctx.dup(ioctx); m_cct = reinterpret_cast(m_ioctx.cct()); @@ -69,8 +68,9 @@ JournalPlayer::JournalPlayer(librados::IoCtx &ioctx, uint8_t splay_width = m_journal_metadata->get_splay_width(); auto &active_position = commit_position.object_positions.front(); m_active_tag_tid = active_position.tag_tid; - m_commit_object = active_position.object_number; - m_splay_offset = m_commit_object % splay_width; + m_commit_position_valid = true; + m_commit_position = active_position; + m_splay_offset = active_position.object_number % splay_width; for (auto &position : commit_position.object_positions) { uint8_t splay_offset = position.object_number % splay_width; m_commit_positions[splay_offset] = position; @@ -94,6 +94,7 @@ void JournalPlayer::prefetch() { assert(m_state == STATE_INIT); m_state = STATE_PREFETCH; + m_active_set = m_journal_metadata->get_active_set(); uint8_t splay_width = m_journal_metadata->get_splay_width(); for (uint8_t splay_offset = 0; splay_offset < splay_width; ++splay_offset) { m_prefetch_splay_offsets.insert(splay_offset); @@ -177,12 +178,7 @@ bool JournalPlayer::try_pop_front(Entry *entry, uint64_t *commit_tid) { if (!is_object_set_ready()) { m_handler_notified = false; } else { - if (!m_watch_enabled) { - notify_complete(0); - } else if (!m_watch_scheduled) { - m_handler_notified = false; - schedule_watch(); - } + refetch(true); } return false; } @@ -264,10 +260,7 @@ int JournalPlayer::process_prefetch(uint64_t object_number) { ObjectPlayerPtr object_player = m_object_players[splay_offset]; // prefetch in-order since a newer splay object could prefetch first - while (m_fetch_object_numbers.count( - object_player->get_object_number()) == 0) { - uint64_t player_object_number = object_player->get_object_number(); - + if (m_fetch_object_numbers.count(object_player->get_object_number()) == 0) { // skip past known committed records if (m_commit_positions.count(splay_offset) != 0 && !object_player->empty()) { @@ -296,20 +289,6 @@ int JournalPlayer::process_prefetch(uint64_t object_number) { object_player->pop_front(); } - // if this object contains the commit position, our read should start with - // the next consistent journal entry in the sequence - if (player_object_number == m_commit_object) { - if (object_player->empty()) { - advance_splay_object(); - } else { - Entry entry; - object_player->front(&entry); - if (entry.get_tag_tid() == position.tag_tid) { - advance_splay_object(); - } - } - } - // do not search for commit position for this object // if we've already seen it if (found_commit) { @@ -318,9 +297,14 @@ int JournalPlayer::process_prefetch(uint64_t object_number) { } // if the object is empty, pre-fetch the next splay object - if (!remove_empty_object_player(object_player)) { + if (object_player->empty() && object_player->refetch_required()) { + ldout(m_cct, 10) << "refetching potentially partially decoded object" + << dendl; + object_player->clear_refetch_required(); + fetch(object_player); + } else if (!remove_empty_object_player(object_player)) { + ldout(m_cct, 10) << "prefetch of object complete" << dendl; prefetch_complete = true; - break; } } @@ -333,17 +317,32 @@ int JournalPlayer::process_prefetch(uint64_t object_number) { return 0; } + ldout(m_cct, 10) << "switching to playback mode" << dendl; m_state = STATE_PLAYBACK; + + // if we have a valid commit position, our read should start with + // the next consistent journal entry in the sequence + if (m_commit_position_valid) { + splay_offset = m_commit_position.object_number % splay_width; + object_player = m_object_players[splay_offset]; + + if (object_player->empty()) { + if (!object_player->refetch_required()) { + advance_splay_object(); + } + } else { + Entry entry; + object_player->front(&entry); + if (entry.get_tag_tid() == m_commit_position.tag_tid) { + advance_splay_object(); + } + } + } + if (verify_playback_ready()) { notify_entries_available(); } else if (is_object_set_ready()) { - if (m_watch_enabled) { - schedule_watch(); - } else { - ldout(m_cct, 10) << __func__ << ": no uncommitted entries available" - << dendl; - notify_complete(0); - } + refetch(false); } return 0; } @@ -355,17 +354,7 @@ int JournalPlayer::process_playback(uint64_t object_number) { if (verify_playback_ready()) { notify_entries_available(); } else if (is_object_set_ready()) { - if (m_watch_enabled) { - schedule_watch(); - } else { - ObjectPlayerPtr object_player = get_object_player(); - uint8_t splay_width = m_journal_metadata->get_splay_width(); - uint64_t active_set = m_journal_metadata->get_active_set(); - uint64_t object_set = object_player->get_object_number() / splay_width; - if (object_set == active_set) { - notify_complete(0); - } - } + refetch(false); } return 0; } @@ -373,8 +362,10 @@ int JournalPlayer::process_playback(uint64_t object_number) { bool JournalPlayer::is_object_set_ready() const { assert(m_lock.is_locked()); if (m_watch_scheduled || !m_fetch_object_numbers.empty()) { + ldout(m_cct, 20) << __func__ << ": waiting for in-flight fetch" << dendl; return false; } + return true; } @@ -449,13 +440,6 @@ bool JournalPlayer::verify_playback_ready() { } else if (m_prune_tag_tid && *m_prune_tag_tid == *m_active_tag_tid) { ldout(m_cct, 10) << __func__ << ": no more entries" << dendl; return false; - } else if (!m_watch_enabled) { - // current playback position is empty so this tag is done - ldout(m_cct, 10) << __func__ << ": no more in-sequence entries: " - << "object_num=" << object_num << ", " - << "active_tag=" << *m_active_tag_tid << dendl; - prune_active_tag(boost::none); - continue; } else if (m_watch_enabled && m_watch_prune_active_tag) { // detected current tag is now longer active and we have re-read the // current object but it's still empty, so this tag is done @@ -464,12 +448,24 @@ bool JournalPlayer::verify_playback_ready() { << "active_tag " << *m_active_tag_tid << dendl; prune_active_tag(boost::none); continue; - } else if (m_watch_enabled && object_player->refetch_required()) { + } else if (object_player->refetch_required()) { // if the active object requires a refetch, don't proceed looking for a // new tag before this process completes ldout(m_cct, 10) << __func__ << ": refetch required: " << "object_num=" << object_num << dendl; return false; + } else if (!m_watch_enabled) { + // current playback position is empty so this tag is done + ldout(m_cct, 10) << __func__ << ": no more in-sequence entries: " + << "object_num=" << object_num << ", " + << "active_tag=" << *m_active_tag_tid << dendl; + prune_active_tag(boost::none); + continue; + } else if (!m_watch_scheduled) { + // no more entries and we don't have an active watch in-progress + ldout(m_cct, 10) << __func__ << ": no more entries -- watch required" + << dendl; + return false; } } } @@ -560,10 +556,18 @@ bool JournalPlayer::remove_empty_object_player(const ObjectPlayerPtr &player) { uint64_t active_set = m_journal_metadata->get_active_set(); if (!player->empty() || object_set == active_set) { return false; - } else if (m_watch_enabled && player->refetch_required()) { + } else if (player->refetch_required()) { ldout(m_cct, 20) << __func__ << ": " << player->get_oid() << " requires " << "a refetch" << dendl; return false; + } else if (m_active_set != active_set) { + ldout(m_cct, 20) << __func__ << ": new active set detected, all players " + << "require refetch" << dendl; + m_active_set = active_set; + for (auto &pair : m_object_players) { + pair.second->set_refetch_required(); + } + return false; } ldout(m_cct, 15) << __func__ << ": " << player->get_oid() << " empty" @@ -580,13 +584,6 @@ bool JournalPlayer::remove_empty_object_player(const ObjectPlayerPtr &player) { void JournalPlayer::fetch(uint64_t object_num) { assert(m_lock.is_locked()); - std::string oid = utils::get_object_name(m_object_oid_prefix, object_num); - - assert(m_fetch_object_numbers.count(object_num) == 0); - m_fetch_object_numbers.insert(object_num); - - ldout(m_cct, 10) << __func__ << ": " << oid << dendl; - C_Fetch *fetch_ctx = new C_Fetch(this, object_num); ObjectPlayerPtr object_player(new ObjectPlayer( m_ioctx, m_object_oid_prefix, object_num, m_journal_metadata->get_timer(), m_journal_metadata->get_timer_lock(), m_journal_metadata->get_order(), @@ -594,6 +591,20 @@ void JournalPlayer::fetch(uint64_t object_num) { uint8_t splay_width = m_journal_metadata->get_splay_width(); m_object_players[object_num % splay_width] = object_player; + fetch(object_player); +} + +void JournalPlayer::fetch(const ObjectPlayerPtr &object_player) { + assert(m_lock.is_locked()); + + uint64_t object_num = object_player->get_object_number(); + std::string oid = utils::get_object_name(m_object_oid_prefix, object_num); + assert(m_fetch_object_numbers.count(object_num) == 0); + m_fetch_object_numbers.insert(object_num); + + ldout(m_cct, 10) << __func__ << ": " << oid << dendl; + C_Fetch *fetch_ctx = new C_Fetch(this, object_num); + object_player->fetch(fetch_ctx); } @@ -610,9 +621,6 @@ void JournalPlayer::handle_fetched(uint64_t object_num, int r) { return; } - if (r == -ENOENT) { - r = 0; - } if (r == 0) { ObjectPlayerPtr object_player = get_object_player(object_num); remove_empty_object_player(object_player); @@ -620,7 +628,28 @@ void JournalPlayer::handle_fetched(uint64_t object_num, int r) { process_state(object_num, r); } -void JournalPlayer::schedule_watch() { +void JournalPlayer::refetch(bool immediate) { + ldout(m_cct, 10) << __func__ << dendl; + assert(m_lock.is_locked()); + m_handler_notified = false; + + // if watching the object, handle the periodic re-fetch + if (m_watch_enabled) { + schedule_watch(immediate); + return; + } + + ObjectPlayerPtr object_player = get_object_player(); + if (object_player->refetch_required()) { + object_player->clear_refetch_required(); + fetch(object_player); + return; + } + + notify_complete(0); +} + +void JournalPlayer::schedule_watch(bool immediate) { ldout(m_cct, 10) << __func__ << dendl; assert(m_lock.is_locked()); if (m_watch_scheduled) { @@ -654,7 +683,8 @@ void JournalPlayer::schedule_watch() { uint8_t splay_width = m_journal_metadata->get_splay_width(); uint64_t active_set = m_journal_metadata->get_active_set(); uint64_t object_set = object_player->get_object_number() / splay_width; - if (object_set < active_set && object_player->refetch_required()) { + if (immediate || + (object_set < active_set && object_player->refetch_required())) { ldout(m_cct, 20) << __func__ << ": refetching " << object_player->get_oid() << dendl; @@ -728,7 +758,7 @@ void JournalPlayer::handle_watch_assert_active(int r) { m_watch_step = WATCH_STEP_FETCH_CURRENT; if (!m_shut_down && m_watch_enabled) { - schedule_watch(); + schedule_watch(false); } m_async_op_tracker.finish_op(); } diff --git a/src/journal/JournalPlayer.h b/src/journal/JournalPlayer.h index 8ede43c57040a..690eccdd47191 100644 --- a/src/journal/JournalPlayer.h +++ b/src/journal/JournalPlayer.h @@ -115,8 +115,11 @@ class JournalPlayer { PrefetchSplayOffsets m_prefetch_splay_offsets; SplayedObjectPlayers m_object_players; - uint64_t m_commit_object; + + bool m_commit_position_valid = false; + ObjectPosition m_commit_position; SplayedObjectPositions m_commit_positions; + uint64_t m_active_set; boost::optional m_active_tag_tid = boost::none; boost::optional m_prune_tag_tid = boost::none; @@ -137,9 +140,11 @@ class JournalPlayer { int process_playback(uint64_t object_number); void fetch(uint64_t object_num); + void fetch(const ObjectPlayerPtr &object_player); void handle_fetched(uint64_t object_num, int r); + void refetch(bool immediate); - void schedule_watch(); + void schedule_watch(bool immediate); void handle_watch(uint64_t object_num, int r); void handle_watch_assert_active(int r); diff --git a/src/journal/ObjectPlayer.cc b/src/journal/ObjectPlayer.cc index 156da728e9713..31939e9bb167f 100644 --- a/src/journal/ObjectPlayer.cc +++ b/src/journal/ObjectPlayer.cc @@ -110,17 +110,19 @@ int ObjectPlayer::handle_fetch_complete(int r, const bufferlist &bl, << bl.length() << dendl; *refetch = false; - if (r < 0) { + if (r == -ENOENT) { + return 0; + } else if (r < 0) { return r; - } - if (bl.length() == 0) { - return -ENOENT; + } else if (bl.length() == 0) { + return 0; } Mutex::Locker locker(m_lock); assert(m_fetch_in_progress); m_read_off += bl.length(); m_read_bl.append(bl); + m_refetch_required = true; bool full_fetch = (m_max_fetch_bytes == 2U << m_order); bool partial_entry = false; @@ -268,11 +270,6 @@ void ObjectPlayer::handle_watch_fetched(int r) { Context *watch_ctx = nullptr; { Mutex::Locker timer_locker(m_timer_lock); - if (r == -ENOENT) { - r = 0; - } else { - m_refetch_required = true; - } std::swap(watch_ctx, m_watch_ctx); if (m_unwatched) { diff --git a/src/journal/ObjectPlayer.h b/src/journal/ObjectPlayer.h index 74ec94bd31b7a..a9d2d9e985ec4 100644 --- a/src/journal/ObjectPlayer.h +++ b/src/journal/ObjectPlayer.h @@ -65,6 +65,9 @@ class ObjectPlayer : public RefCountedObject { inline bool refetch_required() const { return m_refetch_required; } + inline void set_refetch_required() { + m_refetch_required = true; + } inline void clear_refetch_required() { m_refetch_required = false; } diff --git a/src/test/journal/RadosTestFixture.cc b/src/test/journal/RadosTestFixture.cc index 40ff485203aff..dba3ec6a244a1 100644 --- a/src/test/journal/RadosTestFixture.cc +++ b/src/test/journal/RadosTestFixture.cc @@ -68,9 +68,10 @@ int RadosTestFixture::create(const std::string &oid, uint8_t order, journal::JournalMetadataPtr RadosTestFixture::create_metadata( const std::string &oid, const std::string &client_id, - double commit_interval) { + double commit_interval, uint64_t max_fetch_bytes) { journal::Settings settings; settings.commit_interval = commit_interval; + settings.max_fetch_bytes = max_fetch_bytes; journal::JournalMetadataPtr metadata(new journal::JournalMetadata( m_work_queue, m_timer, &m_timer_lock, m_ioctx, oid, client_id, settings)); diff --git a/src/test/journal/RadosTestFixture.h b/src/test/journal/RadosTestFixture.h index bbd6dde608ddf..4ea22e7cefcf9 100644 --- a/src/test/journal/RadosTestFixture.h +++ b/src/test/journal/RadosTestFixture.h @@ -25,7 +25,8 @@ class RadosTestFixture : public ::testing::Test { uint8_t splay_width = 2); journal::JournalMetadataPtr create_metadata(const std::string &oid, const std::string &client_id = "client", - double commit_internal = 0.1); + double commit_internal = 0.1, + uint64_t max_fetch_bytes = 0); int append(const std::string &oid, const bufferlist &bl); int client_register(const std::string &oid, const std::string &id = "client", diff --git a/src/test/journal/test_JournalPlayer.cc b/src/test/journal/test_JournalPlayer.cc index 000f13ba4d87e..03255d06b7662 100644 --- a/src/test/journal/test_JournalPlayer.cc +++ b/src/test/journal/test_JournalPlayer.cc @@ -13,10 +13,14 @@ #include #include +typedef std::list Entries; + +template class TestJournalPlayer : public RadosTestFixture { public: typedef std::list JournalPlayers; - typedef std::list Entries; + + static const uint64_t max_fetch_bytes = T::max_fetch_bytes; struct ReplayHandler : public journal::ReplayHandler { Mutex lock; @@ -54,19 +58,25 @@ class TestJournalPlayer : public RadosTestFixture { RadosTestFixture::TearDown(); } + journal::JournalMetadataPtr create_metadata(const std::string &oid) { + return RadosTestFixture::create_metadata(oid, "client", 0.1, + max_fetch_bytes); + } + int client_commit(const std::string &oid, journal::JournalPlayer::ObjectSetPosition position) { return RadosTestFixture::client_commit(oid, "client", position); } journal::Entry create_entry(uint64_t tag_tid, uint64_t entry_tid) { + std::string payload(128, '0'); bufferlist payload_bl; - payload_bl.append("playload"); + payload_bl.append(payload); return journal::Entry(tag_tid, entry_tid, payload_bl); } journal::JournalPlayer *create_player(const std::string &oid, - const journal::JournalMetadataPtr &metadata) { + const journal::JournalMetadataPtr &metadata) { journal::JournalPlayer *player(new journal::JournalPlayer( m_ioctx, oid + ".", metadata, &m_replay_hander)); m_players.push_back(player); @@ -100,12 +110,12 @@ class TestJournalPlayer : public RadosTestFixture { } bool wait_for_complete(journal::JournalPlayer *player) { - journal::Entry entry; - uint64_t commit_tid; - player->try_pop_front(&entry, &commit_tid); - Mutex::Locker locker(m_replay_hander.lock); while (!m_replay_hander.complete) { + journal::Entry entry; + uint64_t commit_tid; + player->try_pop_front(&entry, &commit_tid); + if (m_replay_hander.cond.WaitInterval( reinterpret_cast(m_ioctx.cct()), m_replay_hander.lock, utime_t(10, 0)) != 0) { @@ -127,44 +137,54 @@ class TestJournalPlayer : public RadosTestFixture { ReplayHandler m_replay_hander; }; -TEST_F(TestJournalPlayer, Prefetch) { - std::string oid = get_temp_oid(); +template +class TestJournalPlayerParams { +public: + static const uint64_t max_fetch_bytes = _max_fetch_bytes; +}; + +typedef ::testing::Types, + TestJournalPlayerParams<16> > TestJournalPlayerTypes; +TYPED_TEST_CASE(TestJournalPlayer, TestJournalPlayerTypes); + +TYPED_TEST(TestJournalPlayer, Prefetch) { + std::string oid = this->get_temp_oid(); journal::JournalPlayer::ObjectPositions positions; positions = { cls::journal::ObjectPosition(0, 234, 122) }; cls::journal::ObjectSetPosition commit_position(positions); - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 0, 234, 122)); - ASSERT_EQ(0, write_entry(oid, 1, 234, 123)); - ASSERT_EQ(0, write_entry(oid, 0, 234, 124)); - ASSERT_EQ(0, write_entry(oid, 1, 234, 125)); + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 122)); + ASSERT_EQ(0, this->write_entry(oid, 1, 234, 123)); + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 124)); + ASSERT_EQ(0, this->write_entry(oid, 1, 234, 125)); player->prefetch(); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 3, &entries)); - ASSERT_TRUE(wait_for_complete(player)); + ASSERT_TRUE(this->wait_for_entries(player, 3, &entries)); + ASSERT_TRUE(this->wait_for_complete(player)); Entries expected_entries; expected_entries = { - create_entry(234, 123), - create_entry(234, 124), - create_entry(234, 125)}; + this->create_entry(234, 123), + this->create_entry(234, 124), + this->create_entry(234, 125)}; ASSERT_EQ(expected_entries, entries); uint64_t last_tid; @@ -172,8 +192,8 @@ TEST_F(TestJournalPlayer, Prefetch) { ASSERT_EQ(125U, last_tid); } -TEST_F(TestJournalPlayer, PrefetchSkip) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, PrefetchSkip) { + std::string oid = this->get_temp_oid(); journal::JournalPlayer::ObjectPositions positions; positions = { @@ -181,73 +201,73 @@ TEST_F(TestJournalPlayer, PrefetchSkip) { cls::journal::ObjectPosition(1, 234, 124) }; cls::journal::ObjectSetPosition commit_position(positions); - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 0, 234, 122)); - ASSERT_EQ(0, write_entry(oid, 1, 234, 123)); - ASSERT_EQ(0, write_entry(oid, 0, 234, 124)); - ASSERT_EQ(0, write_entry(oid, 1, 234, 125)); + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 122)); + ASSERT_EQ(0, this->write_entry(oid, 1, 234, 123)); + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 124)); + ASSERT_EQ(0, this->write_entry(oid, 1, 234, 125)); player->prefetch(); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 0, &entries)); - ASSERT_TRUE(wait_for_complete(player)); + ASSERT_TRUE(this->wait_for_entries(player, 0, &entries)); + ASSERT_TRUE(this->wait_for_complete(player)); uint64_t last_tid; ASSERT_TRUE(metadata->get_last_allocated_entry_tid(234, &last_tid)); ASSERT_EQ(125U, last_tid); } -TEST_F(TestJournalPlayer, PrefetchWithoutCommit) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, PrefetchWithoutCommit) { + std::string oid = this->get_temp_oid(); cls::journal::ObjectSetPosition commit_position; - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 0, 234, 122)); - ASSERT_EQ(0, write_entry(oid, 1, 234, 123)); + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 122)); + ASSERT_EQ(0, this->write_entry(oid, 1, 234, 123)); player->prefetch(); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 2, &entries)); - ASSERT_TRUE(wait_for_complete(player)); + ASSERT_TRUE(this->wait_for_entries(player, 2, &entries)); + ASSERT_TRUE(this->wait_for_complete(player)); Entries expected_entries; expected_entries = { - create_entry(234, 122), - create_entry(234, 123)}; + this->create_entry(234, 122), + this->create_entry(234, 123)}; ASSERT_EQ(expected_entries, entries); } -TEST_F(TestJournalPlayer, PrefetchMultipleTags) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, PrefetchMultipleTags) { + std::string oid = this->get_temp_oid(); journal::JournalPlayer::ObjectPositions positions; positions = { @@ -256,32 +276,32 @@ TEST_F(TestJournalPlayer, PrefetchMultipleTags) { cls::journal::ObjectPosition(0, 234, 120)}; cls::journal::ObjectSetPosition commit_position(positions); - ASSERT_EQ(0, create(oid, 14, 3)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid, 14, 3)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 0, 234, 120)); - ASSERT_EQ(0, write_entry(oid, 1, 234, 121)); - ASSERT_EQ(0, write_entry(oid, 2, 234, 122)); - ASSERT_EQ(0, write_entry(oid, 0, 234, 123)); - ASSERT_EQ(0, write_entry(oid, 1, 234, 124)); - ASSERT_EQ(0, write_entry(oid, 0, 236, 0)); // new tag allocated + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 120)); + ASSERT_EQ(0, this->write_entry(oid, 1, 234, 121)); + ASSERT_EQ(0, this->write_entry(oid, 2, 234, 122)); + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 123)); + ASSERT_EQ(0, this->write_entry(oid, 1, 234, 124)); + ASSERT_EQ(0, this->write_entry(oid, 0, 236, 0)); // new tag allocated player->prefetch(); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 3, &entries)); - ASSERT_TRUE(wait_for_complete(player)); + ASSERT_TRUE(this->wait_for_entries(player, 3, &entries)); + ASSERT_TRUE(this->wait_for_complete(player)); uint64_t last_tid; ASSERT_TRUE(metadata->get_last_allocated_entry_tid(234, &last_tid)); @@ -290,53 +310,53 @@ TEST_F(TestJournalPlayer, PrefetchMultipleTags) { ASSERT_EQ(0U, last_tid); } -TEST_F(TestJournalPlayer, PrefetchCorruptSequence) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, PrefetchCorruptSequence) { + std::string oid = this->get_temp_oid(); cls::journal::ObjectSetPosition commit_position; - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 0, 234, 120)); - ASSERT_EQ(0, write_entry(oid, 1, 234, 121)); - ASSERT_EQ(0, write_entry(oid, 0, 234, 124)); + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 120)); + ASSERT_EQ(0, this->write_entry(oid, 1, 234, 121)); + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 124)); player->prefetch(); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 2, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 2, &entries)); journal::Entry entry; uint64_t commit_tid; ASSERT_FALSE(player->try_pop_front(&entry, &commit_tid)); - ASSERT_TRUE(wait_for_complete(player)); - ASSERT_EQ(-ENOMSG, m_replay_hander.complete_result); + ASSERT_TRUE(this->wait_for_complete(player)); + ASSERT_EQ(-ENOMSG, this->m_replay_hander.complete_result); } -TEST_F(TestJournalPlayer, PrefetchMissingSequence) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, PrefetchMissingSequence) { + std::string oid = this->get_temp_oid(); cls::journal::ObjectSetPosition commit_position; - ASSERT_EQ(0, create(oid, 14, 4)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid, 14, 4)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); @@ -344,49 +364,49 @@ TEST_F(TestJournalPlayer, PrefetchMissingSequence) { }; ASSERT_EQ(0, metadata->set_active_set(1)); - ASSERT_EQ(0, write_entry(oid, 0, 2, 852)); - ASSERT_EQ(0, write_entry(oid, 0, 2, 856)); - ASSERT_EQ(0, write_entry(oid, 0, 2, 860)); - ASSERT_EQ(0, write_entry(oid, 1, 2, 853)); - ASSERT_EQ(0, write_entry(oid, 1, 2, 857)); - ASSERT_EQ(0, write_entry(oid, 5, 2, 861)); - ASSERT_EQ(0, write_entry(oid, 2, 2, 854)); - ASSERT_EQ(0, write_entry(oid, 0, 3, 0)); - ASSERT_EQ(0, write_entry(oid, 5, 3, 1)); - ASSERT_EQ(0, write_entry(oid, 2, 3, 2)); - ASSERT_EQ(0, write_entry(oid, 3, 3, 3)); + ASSERT_EQ(0, this->write_entry(oid, 0, 2, 852)); + ASSERT_EQ(0, this->write_entry(oid, 0, 2, 856)); + ASSERT_EQ(0, this->write_entry(oid, 0, 2, 860)); + ASSERT_EQ(0, this->write_entry(oid, 1, 2, 853)); + ASSERT_EQ(0, this->write_entry(oid, 1, 2, 857)); + ASSERT_EQ(0, this->write_entry(oid, 5, 2, 861)); + ASSERT_EQ(0, this->write_entry(oid, 2, 2, 854)); + ASSERT_EQ(0, this->write_entry(oid, 0, 3, 0)); + ASSERT_EQ(0, this->write_entry(oid, 5, 3, 1)); + ASSERT_EQ(0, this->write_entry(oid, 2, 3, 2)); + ASSERT_EQ(0, this->write_entry(oid, 3, 3, 3)); player->prefetch(); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 7, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 7, &entries)); Entries expected_entries = { - create_entry(2, 852), - create_entry(2, 853), - create_entry(2, 854), - create_entry(3, 0), - create_entry(3, 1), - create_entry(3, 2), - create_entry(3, 3)}; + this->create_entry(2, 852), + this->create_entry(2, 853), + this->create_entry(2, 854), + this->create_entry(3, 0), + this->create_entry(3, 1), + this->create_entry(3, 2), + this->create_entry(3, 3)}; ASSERT_EQ(expected_entries, entries); - ASSERT_TRUE(wait_for_complete(player)); - ASSERT_EQ(0, m_replay_hander.complete_result); + ASSERT_TRUE(this->wait_for_complete(player)); + ASSERT_EQ(0, this->m_replay_hander.complete_result); } -TEST_F(TestJournalPlayer, PrefetchLargeMissingSequence) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, PrefetchLargeMissingSequence) { + std::string oid = this->get_temp_oid(); cls::journal::ObjectSetPosition commit_position; - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); @@ -394,211 +414,211 @@ TEST_F(TestJournalPlayer, PrefetchLargeMissingSequence) { }; ASSERT_EQ(0, metadata->set_active_set(2)); - ASSERT_EQ(0, write_entry(oid, 0, 0, 0)); - ASSERT_EQ(0, write_entry(oid, 1, 0, 1)); - ASSERT_EQ(0, write_entry(oid, 3, 0, 3)); - ASSERT_EQ(0, write_entry(oid, 4, 1, 0)); + ASSERT_EQ(0, this->write_entry(oid, 0, 0, 0)); + ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1)); + ASSERT_EQ(0, this->write_entry(oid, 3, 0, 3)); + ASSERT_EQ(0, this->write_entry(oid, 4, 1, 0)); player->prefetch(); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 3, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 3, &entries)); Entries expected_entries = { - create_entry(0, 0), - create_entry(0, 1), - create_entry(1, 0)}; + this->create_entry(0, 0), + this->create_entry(0, 1), + this->create_entry(1, 0)}; ASSERT_EQ(expected_entries, entries); } -TEST_F(TestJournalPlayer, PrefetchBlockedNewTag) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, PrefetchBlockedNewTag) { + std::string oid = this->get_temp_oid(); cls::journal::ObjectSetPosition commit_position; - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 0, 0, 0)); - ASSERT_EQ(0, write_entry(oid, 1, 0, 1)); - ASSERT_EQ(0, write_entry(oid, 0, 0, 2)); - ASSERT_EQ(0, write_entry(oid, 0, 0, 4)); - ASSERT_EQ(0, write_entry(oid, 0, 1, 0)); + ASSERT_EQ(0, this->write_entry(oid, 0, 0, 0)); + ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1)); + ASSERT_EQ(0, this->write_entry(oid, 0, 0, 2)); + ASSERT_EQ(0, this->write_entry(oid, 0, 0, 4)); + ASSERT_EQ(0, this->write_entry(oid, 0, 1, 0)); player->prefetch(); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 4, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 4, &entries)); Entries expected_entries = { - create_entry(0, 0), - create_entry(0, 1), - create_entry(0, 2), - create_entry(1, 0)}; + this->create_entry(0, 0), + this->create_entry(0, 1), + this->create_entry(0, 2), + this->create_entry(1, 0)}; ASSERT_EQ(expected_entries, entries); } -TEST_F(TestJournalPlayer, PrefetchStaleEntries) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, PrefetchStaleEntries) { + std::string oid = this->get_temp_oid(); journal::JournalPlayer::ObjectPositions positions = { cls::journal::ObjectPosition(0, 1, 0) }; cls::journal::ObjectSetPosition commit_position(positions); - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 1, 0, 1)); - ASSERT_EQ(0, write_entry(oid, 1, 0, 3)); - ASSERT_EQ(0, write_entry(oid, 0, 1, 0)); - ASSERT_EQ(0, write_entry(oid, 1, 1, 1)); + ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1)); + ASSERT_EQ(0, this->write_entry(oid, 1, 0, 3)); + ASSERT_EQ(0, this->write_entry(oid, 0, 1, 0)); + ASSERT_EQ(0, this->write_entry(oid, 1, 1, 1)); player->prefetch(); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 1, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 1, &entries)); Entries expected_entries = { - create_entry(1, 1)}; + this->create_entry(1, 1)}; ASSERT_EQ(expected_entries, entries); - ASSERT_TRUE(wait_for_complete(player)); - ASSERT_EQ(0, m_replay_hander.complete_result); + ASSERT_TRUE(this->wait_for_complete(player)); + ASSERT_EQ(0, this->m_replay_hander.complete_result); } -TEST_F(TestJournalPlayer, PrefetchUnexpectedTag) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, PrefetchUnexpectedTag) { + std::string oid = this->get_temp_oid(); cls::journal::ObjectSetPosition commit_position; - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 0, 234, 120)); - ASSERT_EQ(0, write_entry(oid, 1, 235, 121)); - ASSERT_EQ(0, write_entry(oid, 0, 234, 124)); + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 120)); + ASSERT_EQ(0, this->write_entry(oid, 1, 235, 121)); + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 124)); player->prefetch(); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 1, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 1, &entries)); journal::Entry entry; uint64_t commit_tid; ASSERT_FALSE(player->try_pop_front(&entry, &commit_tid)); - ASSERT_TRUE(wait_for_complete(player)); - ASSERT_EQ(0, m_replay_hander.complete_result); + ASSERT_TRUE(this->wait_for_complete(player)); + ASSERT_EQ(0, this->m_replay_hander.complete_result); } -TEST_F(TestJournalPlayer, PrefetchAndWatch) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, PrefetchAndWatch) { + std::string oid = this->get_temp_oid(); journal::JournalPlayer::ObjectPositions positions; positions = { cls::journal::ObjectPosition(0, 234, 122)}; cls::journal::ObjectSetPosition commit_position(positions); - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 0, 234, 122)); + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 122)); player->prefetch_and_watch(0.25); Entries entries; - ASSERT_EQ(0, write_entry(oid, 1, 234, 123)); - ASSERT_TRUE(wait_for_entries(player, 1, &entries)); + ASSERT_EQ(0, this->write_entry(oid, 1, 234, 123)); + ASSERT_TRUE(this->wait_for_entries(player, 1, &entries)); Entries expected_entries; - expected_entries = {create_entry(234, 123)}; + expected_entries = {this->create_entry(234, 123)}; ASSERT_EQ(expected_entries, entries); - ASSERT_EQ(0, write_entry(oid, 0, 234, 124)); - ASSERT_TRUE(wait_for_entries(player, 1, &entries)); + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 124)); + ASSERT_TRUE(this->wait_for_entries(player, 1, &entries)); - expected_entries = {create_entry(234, 124)}; + expected_entries = {this->create_entry(234, 124)}; ASSERT_EQ(expected_entries, entries); } -TEST_F(TestJournalPlayer, PrefetchSkippedObject) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, PrefetchSkippedObject) { + std::string oid = this->get_temp_oid(); cls::journal::ObjectSetPosition commit_position; - ASSERT_EQ(0, create(oid, 14, 3)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid, 14, 3)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); ASSERT_EQ(0, metadata->set_active_set(2)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 0, 234, 122)); - ASSERT_EQ(0, write_entry(oid, 1, 234, 123)); - ASSERT_EQ(0, write_entry(oid, 5, 234, 124)); - ASSERT_EQ(0, write_entry(oid, 6, 234, 125)); - ASSERT_EQ(0, write_entry(oid, 7, 234, 126)); + ASSERT_EQ(0, this->write_entry(oid, 0, 234, 122)); + ASSERT_EQ(0, this->write_entry(oid, 1, 234, 123)); + ASSERT_EQ(0, this->write_entry(oid, 5, 234, 124)); + ASSERT_EQ(0, this->write_entry(oid, 6, 234, 125)); + ASSERT_EQ(0, this->write_entry(oid, 7, 234, 126)); player->prefetch(); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 5, &entries)); - ASSERT_TRUE(wait_for_complete(player)); + ASSERT_TRUE(this->wait_for_entries(player, 5, &entries)); + ASSERT_TRUE(this->wait_for_complete(player)); Entries expected_entries; expected_entries = { - create_entry(234, 122), - create_entry(234, 123), - create_entry(234, 124), - create_entry(234, 125), - create_entry(234, 126)}; + this->create_entry(234, 122), + this->create_entry(234, 123), + this->create_entry(234, 124), + this->create_entry(234, 125), + this->create_entry(234, 126)}; ASSERT_EQ(expected_entries, entries); uint64_t last_tid; @@ -606,8 +626,8 @@ TEST_F(TestJournalPlayer, PrefetchSkippedObject) { ASSERT_EQ(126U, last_tid); } -TEST_F(TestJournalPlayer, ImbalancedJournal) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, ImbalancedJournal) { + std::string oid = this->get_temp_oid(); journal::JournalPlayer::ObjectPositions positions = { cls::journal::ObjectPosition(9, 300, 1), @@ -616,43 +636,43 @@ TEST_F(TestJournalPlayer, ImbalancedJournal) { cls::journal::ObjectPosition(11, 200, 4331) }; cls::journal::ObjectSetPosition commit_position(positions); - ASSERT_EQ(0, create(oid, 14, 4)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid, 14, 4)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); ASSERT_EQ(0, metadata->set_active_set(2)); metadata->set_minimum_set(2); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 8, 300, 0)); - ASSERT_EQ(0, write_entry(oid, 8, 301, 0)); - ASSERT_EQ(0, write_entry(oid, 9, 300, 1)); - ASSERT_EQ(0, write_entry(oid, 9, 301, 1)); - ASSERT_EQ(0, write_entry(oid, 10, 200, 4334)); - ASSERT_EQ(0, write_entry(oid, 10, 301, 2)); - ASSERT_EQ(0, write_entry(oid, 11, 200, 4331)); - ASSERT_EQ(0, write_entry(oid, 11, 301, 3)); + ASSERT_EQ(0, this->write_entry(oid, 8, 300, 0)); + ASSERT_EQ(0, this->write_entry(oid, 8, 301, 0)); + ASSERT_EQ(0, this->write_entry(oid, 9, 300, 1)); + ASSERT_EQ(0, this->write_entry(oid, 9, 301, 1)); + ASSERT_EQ(0, this->write_entry(oid, 10, 200, 4334)); + ASSERT_EQ(0, this->write_entry(oid, 10, 301, 2)); + ASSERT_EQ(0, this->write_entry(oid, 11, 200, 4331)); + ASSERT_EQ(0, this->write_entry(oid, 11, 301, 3)); player->prefetch(); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 4, &entries)); - ASSERT_TRUE(wait_for_complete(player)); + ASSERT_TRUE(this->wait_for_entries(player, 4, &entries)); + ASSERT_TRUE(this->wait_for_complete(player)); Entries expected_entries; expected_entries = { - create_entry(301, 0), - create_entry(301, 1), - create_entry(301, 2), - create_entry(301, 3)}; + this->create_entry(301, 0), + this->create_entry(301, 1), + this->create_entry(301, 2), + this->create_entry(301, 3)}; ASSERT_EQ(expected_entries, entries); uint64_t last_tid; @@ -660,124 +680,124 @@ TEST_F(TestJournalPlayer, ImbalancedJournal) { ASSERT_EQ(3U, last_tid); } -TEST_F(TestJournalPlayer, LiveReplayLaggyAppend) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, LiveReplayLaggyAppend) { + std::string oid = this->get_temp_oid(); cls::journal::ObjectSetPosition commit_position; - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 0, 0, 0)); - ASSERT_EQ(0, write_entry(oid, 1, 0, 1)); - ASSERT_EQ(0, write_entry(oid, 0, 0, 2)); - ASSERT_EQ(0, write_entry(oid, 0, 0, 4)); - ASSERT_EQ(0, write_entry(oid, 3, 0, 5)); // laggy entry 0/3 in object 1 + ASSERT_EQ(0, this->write_entry(oid, 0, 0, 0)); + ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1)); + ASSERT_EQ(0, this->write_entry(oid, 0, 0, 2)); + ASSERT_EQ(0, this->write_entry(oid, 0, 0, 4)); + ASSERT_EQ(0, this->write_entry(oid, 3, 0, 5)); // laggy entry 0/3 in object 1 player->prefetch_and_watch(0.25); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 3, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 3, &entries)); Entries expected_entries = { - create_entry(0, 0), - create_entry(0, 1), - create_entry(0, 2)}; + this->create_entry(0, 0), + this->create_entry(0, 1), + this->create_entry(0, 2)}; ASSERT_EQ(expected_entries, entries); journal::Entry entry; uint64_t commit_tid; ASSERT_FALSE(player->try_pop_front(&entry, &commit_tid)); - ASSERT_EQ(0, write_entry(oid, 1, 0, 3)); + ASSERT_EQ(0, this->write_entry(oid, 1, 0, 3)); ASSERT_EQ(0, metadata->set_active_set(1)); - ASSERT_TRUE(wait_for_entries(player, 3, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 3, &entries)); expected_entries = { - create_entry(0, 3), - create_entry(0, 4), - create_entry(0, 5)}; + this->create_entry(0, 3), + this->create_entry(0, 4), + this->create_entry(0, 5)}; ASSERT_EQ(expected_entries, entries); } -TEST_F(TestJournalPlayer, LiveReplayMissingSequence) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, LiveReplayMissingSequence) { + std::string oid = this->get_temp_oid(); cls::journal::ObjectSetPosition commit_position; - ASSERT_EQ(0, create(oid, 14, 4)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid, 14, 4)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 0, 2, 852)); - ASSERT_EQ(0, write_entry(oid, 0, 2, 856)); - ASSERT_EQ(0, write_entry(oid, 0, 2, 860)); - ASSERT_EQ(0, write_entry(oid, 1, 2, 853)); - ASSERT_EQ(0, write_entry(oid, 1, 2, 857)); - ASSERT_EQ(0, write_entry(oid, 2, 2, 854)); - ASSERT_EQ(0, write_entry(oid, 0, 2, 856)); + ASSERT_EQ(0, this->write_entry(oid, 0, 2, 852)); + ASSERT_EQ(0, this->write_entry(oid, 0, 2, 856)); + ASSERT_EQ(0, this->write_entry(oid, 0, 2, 860)); + ASSERT_EQ(0, this->write_entry(oid, 1, 2, 853)); + ASSERT_EQ(0, this->write_entry(oid, 1, 2, 857)); + ASSERT_EQ(0, this->write_entry(oid, 2, 2, 854)); + ASSERT_EQ(0, this->write_entry(oid, 0, 2, 856)); player->prefetch_and_watch(0.25); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 3, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 3, &entries)); Entries expected_entries = { - create_entry(2, 852), - create_entry(2, 853), - create_entry(2, 854)}; + this->create_entry(2, 852), + this->create_entry(2, 853), + this->create_entry(2, 854)}; ASSERT_EQ(expected_entries, entries); journal::Entry entry; uint64_t commit_tid; ASSERT_FALSE(player->try_pop_front(&entry, &commit_tid)); - ASSERT_EQ(0, write_entry(oid, 3, 3, 3)); - ASSERT_EQ(0, write_entry(oid, 2, 3, 2)); - ASSERT_EQ(0, write_entry(oid, 1, 3, 1)); - ASSERT_EQ(0, write_entry(oid, 0, 3, 0)); - ASSERT_TRUE(wait_for_entries(player, 4, &entries)); + ASSERT_EQ(0, this->write_entry(oid, 3, 3, 3)); + ASSERT_EQ(0, this->write_entry(oid, 2, 3, 2)); + ASSERT_EQ(0, this->write_entry(oid, 1, 3, 1)); + ASSERT_EQ(0, this->write_entry(oid, 0, 3, 0)); + ASSERT_TRUE(this->wait_for_entries(player, 4, &entries)); expected_entries = { - create_entry(3, 0), - create_entry(3, 1), - create_entry(3, 2), - create_entry(3, 3)}; + this->create_entry(3, 0), + this->create_entry(3, 1), + this->create_entry(3, 2), + this->create_entry(3, 3)}; ASSERT_EQ(expected_entries, entries); } -TEST_F(TestJournalPlayer, LiveReplayLargeMissingSequence) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, LiveReplayLargeMissingSequence) { + std::string oid = this->get_temp_oid(); cls::journal::ObjectSetPosition commit_position; - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); @@ -785,35 +805,35 @@ TEST_F(TestJournalPlayer, LiveReplayLargeMissingSequence) { }; ASSERT_EQ(0, metadata->set_active_set(2)); - ASSERT_EQ(0, write_entry(oid, 0, 0, 0)); - ASSERT_EQ(0, write_entry(oid, 1, 0, 1)); - ASSERT_EQ(0, write_entry(oid, 3, 0, 3)); - ASSERT_EQ(0, write_entry(oid, 4, 1, 0)); + ASSERT_EQ(0, this->write_entry(oid, 0, 0, 0)); + ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1)); + ASSERT_EQ(0, this->write_entry(oid, 3, 0, 3)); + ASSERT_EQ(0, this->write_entry(oid, 4, 1, 0)); player->prefetch_and_watch(0.25); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 3, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 3, &entries)); Entries expected_entries = { - create_entry(0, 0), - create_entry(0, 1), - create_entry(1, 0)}; + this->create_entry(0, 0), + this->create_entry(0, 1), + this->create_entry(1, 0)}; ASSERT_EQ(expected_entries, entries); } -TEST_F(TestJournalPlayer, LiveReplayBlockedNewTag) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, LiveReplayBlockedNewTag) { + std::string oid = this->get_temp_oid(); cls::journal::ObjectSetPosition commit_position; - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); @@ -826,19 +846,19 @@ TEST_F(TestJournalPlayer, LiveReplayBlockedNewTag) { ASSERT_EQ(0, ctx1.wait()); ASSERT_EQ(0, metadata->set_active_set(0)); - ASSERT_EQ(0, write_entry(oid, 0, tag1.tid, 0)); - ASSERT_EQ(0, write_entry(oid, 1, tag1.tid, 1)); - ASSERT_EQ(0, write_entry(oid, 0, tag1.tid, 2)); - ASSERT_EQ(0, write_entry(oid, 0, tag1.tid, 4)); + ASSERT_EQ(0, this->write_entry(oid, 0, tag1.tid, 0)); + ASSERT_EQ(0, this->write_entry(oid, 1, tag1.tid, 1)); + ASSERT_EQ(0, this->write_entry(oid, 0, tag1.tid, 2)); + ASSERT_EQ(0, this->write_entry(oid, 0, tag1.tid, 4)); player->prefetch_and_watch(0.25); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 3, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 3, &entries)); Entries expected_entries = { - create_entry(tag1.tid, 0), - create_entry(tag1.tid, 1), - create_entry(tag1.tid, 2)}; + this->create_entry(tag1.tid, 0), + this->create_entry(tag1.tid, 1), + this->create_entry(tag1.tid, 2)}; ASSERT_EQ(expected_entries, entries); journal::Entry entry; @@ -850,65 +870,65 @@ TEST_F(TestJournalPlayer, LiveReplayBlockedNewTag) { metadata->allocate_tag(tag1.tag_class, {}, &tag2, &ctx2); ASSERT_EQ(0, ctx2.wait()); - ASSERT_EQ(0, write_entry(oid, 0, tag2.tid, 0)); - ASSERT_TRUE(wait_for_entries(player, 1, &entries)); + ASSERT_EQ(0, this->write_entry(oid, 0, tag2.tid, 0)); + ASSERT_TRUE(this->wait_for_entries(player, 1, &entries)); expected_entries = { - create_entry(tag2.tid, 0)}; + this->create_entry(tag2.tid, 0)}; ASSERT_EQ(expected_entries, entries); } -TEST_F(TestJournalPlayer, LiveReplayStaleEntries) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, LiveReplayStaleEntries) { + std::string oid = this->get_temp_oid(); journal::JournalPlayer::ObjectPositions positions = { cls::journal::ObjectPosition(0, 1, 0) }; cls::journal::ObjectSetPosition commit_position(positions); - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); ASSERT_EQ(0, unwatch_ctx.wait()); }; - ASSERT_EQ(0, write_entry(oid, 1, 0, 1)); - ASSERT_EQ(0, write_entry(oid, 1, 0, 3)); - ASSERT_EQ(0, write_entry(oid, 0, 1, 0)); - ASSERT_EQ(0, write_entry(oid, 1, 1, 1)); + ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1)); + ASSERT_EQ(0, this->write_entry(oid, 1, 0, 3)); + ASSERT_EQ(0, this->write_entry(oid, 0, 1, 0)); + ASSERT_EQ(0, this->write_entry(oid, 1, 1, 1)); player->prefetch_and_watch(0.25); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 1, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 1, &entries)); Entries expected_entries = { - create_entry(1, 1)}; + this->create_entry(1, 1)}; ASSERT_EQ(expected_entries, entries); } -TEST_F(TestJournalPlayer, LiveReplayRefetchRemoveEmpty) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, LiveReplayRefetchRemoveEmpty) { + std::string oid = this->get_temp_oid(); journal::JournalPlayer::ObjectPositions positions = { cls::journal::ObjectPosition(1, 0, 1), cls::journal::ObjectPosition(0, 0, 0)}; cls::journal::ObjectSetPosition commit_position(positions); - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, commit_position)); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, commit_position)); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); @@ -916,41 +936,41 @@ TEST_F(TestJournalPlayer, LiveReplayRefetchRemoveEmpty) { }; ASSERT_EQ(0, metadata->set_active_set(1)); - ASSERT_EQ(0, write_entry(oid, 0, 0, 0)); - ASSERT_EQ(0, write_entry(oid, 1, 0, 1)); - ASSERT_EQ(0, write_entry(oid, 3, 0, 3)); - ASSERT_EQ(0, write_entry(oid, 2, 1, 0)); + ASSERT_EQ(0, this->write_entry(oid, 0, 0, 0)); + ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1)); + ASSERT_EQ(0, this->write_entry(oid, 3, 0, 3)); + ASSERT_EQ(0, this->write_entry(oid, 2, 1, 0)); player->prefetch_and_watch(0.25); Entries entries; - ASSERT_TRUE(wait_for_entries(player, 1, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 1, &entries)); Entries expected_entries = { - create_entry(1, 0)}; + this->create_entry(1, 0)}; ASSERT_EQ(expected_entries, entries); // should remove player for offset 3 after refetching ASSERT_EQ(0, metadata->set_active_set(3)); - ASSERT_EQ(0, write_entry(oid, 7, 1, 1)); + ASSERT_EQ(0, this->write_entry(oid, 7, 1, 1)); - ASSERT_TRUE(wait_for_entries(player, 1, &entries)); + ASSERT_TRUE(this->wait_for_entries(player, 1, &entries)); expected_entries = { - create_entry(1, 1)}; + this->create_entry(1, 1)}; ASSERT_EQ(expected_entries, entries); } -TEST_F(TestJournalPlayer, PrefechShutDown) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, PrefechShutDown) { + std::string oid = this->get_temp_oid(); - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, {})); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, {})); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); @@ -959,17 +979,17 @@ TEST_F(TestJournalPlayer, PrefechShutDown) { player->prefetch(); } -TEST_F(TestJournalPlayer, LiveReplayShutDown) { - std::string oid = get_temp_oid(); +TYPED_TEST(TestJournalPlayer, LiveReplayShutDown) { + std::string oid = this->get_temp_oid(); - ASSERT_EQ(0, create(oid)); - ASSERT_EQ(0, client_register(oid)); - ASSERT_EQ(0, client_commit(oid, {})); + ASSERT_EQ(0, this->create(oid)); + ASSERT_EQ(0, this->client_register(oid)); + ASSERT_EQ(0, this->client_commit(oid, {})); - journal::JournalMetadataPtr metadata = create_metadata(oid); - ASSERT_EQ(0, init_metadata(metadata)); + journal::JournalMetadataPtr metadata = this->create_metadata(oid); + ASSERT_EQ(0, this->init_metadata(metadata)); - journal::JournalPlayer *player = create_player(oid, metadata); + journal::JournalPlayer *player = this->create_player(oid, metadata); BOOST_SCOPE_EXIT_ALL( (player) ) { C_SaferCond unwatch_ctx; player->shut_down(&unwatch_ctx); diff --git a/src/test/journal/test_ObjectPlayer.cc b/src/test/journal/test_ObjectPlayer.cc index ed4c0b667e68d..880128ee8e19a 100644 --- a/src/test/journal/test_ObjectPlayer.cc +++ b/src/test/journal/test_ObjectPlayer.cc @@ -145,7 +145,7 @@ TYPED_TEST(TestObjectPlayer, FetchEmpty) { journal::ObjectPlayerPtr object = this->create_object(oid, 14); - ASSERT_EQ(-ENOENT, this->fetch(object)); + ASSERT_EQ(0, this->fetch(object)); ASSERT_TRUE(object->empty()); } From 11475f4fe740cccdfea459ebeabdca8cb94dc911 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Wed, 20 Jul 2016 09:15:26 -0400 Subject: [PATCH 06/16] journal: improve debug log messages rbd-mirror debugging involved potentially thousands of journals concurrently running. The instance address will correlate log messages between journals. Signed-off-by: Jason Dillaman --- src/journal/Entry.cc | 2 +- src/journal/JournalMetadata.cc | 2 +- src/journal/JournalPlayer.cc | 2 +- src/journal/JournalRecorder.cc | 2 +- src/journal/JournalTrimmer.cc | 2 +- src/journal/Journaler.cc | 2 +- src/journal/ObjectPlayer.cc | 2 +- src/journal/ObjectRecorder.cc | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/journal/Entry.cc b/src/journal/Entry.cc index f88dea86358ba..257fa583af5e4 100644 --- a/src/journal/Entry.cc +++ b/src/journal/Entry.cc @@ -9,7 +9,7 @@ #define dout_subsys ceph_subsys_journaler #undef dout_prefix -#define dout_prefix *_dout << "Entry: " +#define dout_prefix *_dout << "Entry: " << this << " " namespace journal { diff --git a/src/journal/JournalMetadata.cc b/src/journal/JournalMetadata.cc index 63c67f21cdb1a..de46bd7ff0af1 100644 --- a/src/journal/JournalMetadata.cc +++ b/src/journal/JournalMetadata.cc @@ -11,7 +11,7 @@ #define dout_subsys ceph_subsys_journaler #undef dout_prefix -#define dout_prefix *_dout << "JournalMetadata: " +#define dout_prefix *_dout << "JournalMetadata: " << this << " " namespace journal { diff --git a/src/journal/JournalPlayer.cc b/src/journal/JournalPlayer.cc index 30d6cdd36cfa0..750f936b20c71 100644 --- a/src/journal/JournalPlayer.cc +++ b/src/journal/JournalPlayer.cc @@ -8,7 +8,7 @@ #define dout_subsys ceph_subsys_journaler #undef dout_prefix -#define dout_prefix *_dout << "JournalPlayer: " +#define dout_prefix *_dout << "JournalPlayer: " << this << " " namespace journal { diff --git a/src/journal/JournalRecorder.cc b/src/journal/JournalRecorder.cc index b4da4ff006737..4cbe7391ee94d 100644 --- a/src/journal/JournalRecorder.cc +++ b/src/journal/JournalRecorder.cc @@ -8,7 +8,7 @@ #define dout_subsys ceph_subsys_journaler #undef dout_prefix -#define dout_prefix *_dout << "JournalRecorder: " +#define dout_prefix *_dout << "JournalRecorder: " << this << " " namespace journal { diff --git a/src/journal/JournalTrimmer.cc b/src/journal/JournalTrimmer.cc index a4a47fab76c5c..aef16c2fb0acb 100644 --- a/src/journal/JournalTrimmer.cc +++ b/src/journal/JournalTrimmer.cc @@ -9,7 +9,7 @@ #define dout_subsys ceph_subsys_journaler #undef dout_prefix -#define dout_prefix *_dout << "JournalTrimmer: " +#define dout_prefix *_dout << "JournalTrimmer: " << this << " " namespace journal { diff --git a/src/journal/Journaler.cc b/src/journal/Journaler.cc index 27cc33845c01f..c08a11b71de49 100644 --- a/src/journal/Journaler.cc +++ b/src/journal/Journaler.cc @@ -19,7 +19,7 @@ #define dout_subsys ceph_subsys_journaler #undef dout_prefix -#define dout_prefix *_dout << "Journaler: " +#define dout_prefix *_dout << "Journaler: " << this << " " namespace journal { diff --git a/src/journal/ObjectPlayer.cc b/src/journal/ObjectPlayer.cc index 31939e9bb167f..43adb5da5cd63 100644 --- a/src/journal/ObjectPlayer.cc +++ b/src/journal/ObjectPlayer.cc @@ -8,7 +8,7 @@ #define dout_subsys ceph_subsys_journaler #undef dout_prefix -#define dout_prefix *_dout << "ObjectPlayer: " +#define dout_prefix *_dout << "ObjectPlayer: " << this << " " namespace journal { diff --git a/src/journal/ObjectRecorder.cc b/src/journal/ObjectRecorder.cc index 5972d899b1355..0cf2fd1e8ea05 100644 --- a/src/journal/ObjectRecorder.cc +++ b/src/journal/ObjectRecorder.cc @@ -10,7 +10,7 @@ #define dout_subsys ceph_subsys_journaler #undef dout_prefix -#define dout_prefix *_dout << "ObjectRecorder: " +#define dout_prefix *_dout << "ObjectRecorder: " << this << " " using namespace cls::journal; From 2c65471de4b0f54b8ed722f5deaf51ba62632e37 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Wed, 20 Jul 2016 10:04:21 -0400 Subject: [PATCH 07/16] journal: possible deadlock during flush of journal entries If a future flush is requested at the exact same moment that an overflow is detected, the two threads will deadlock since locks are not taken in a consistent order. Signed-off-by: Jason Dillaman --- src/journal/FutureImpl.cc | 43 +++++++++++++++++++---------- src/journal/FutureImpl.h | 4 +++ src/test/journal/test_FutureImpl.cc | 7 +++-- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/src/journal/FutureImpl.cc b/src/journal/FutureImpl.cc index aebfe12e31002..1597c7398dc75 100644 --- a/src/journal/FutureImpl.cc +++ b/src/journal/FutureImpl.cc @@ -10,7 +10,7 @@ FutureImpl::FutureImpl(uint64_t tag_tid, uint64_t entry_tid, uint64_t commit_tid) : RefCountedObject(NULL, 0), m_tag_tid(tag_tid), m_entry_tid(entry_tid), m_commit_tid(commit_tid), - m_lock(utils::unique_lock_name("FutureImpl::m_lock", this)), m_safe(false), + m_lock("FutureImpl::m_lock", false, false), m_safe(false), m_consistent(false), m_return_value(0), m_flush_state(FLUSH_STATE_NONE), m_consistent_ack(this) { } @@ -27,36 +27,51 @@ void FutureImpl::init(const FutureImplPtr &prev_future) { } void FutureImpl::flush(Context *on_safe) { + bool complete; - FlushHandlerPtr flush_handler; + FlushHandlers flush_handlers; + FutureImplPtr prev_future; { Mutex::Locker locker(m_lock); complete = (m_safe && m_consistent); if (!complete) { - if (on_safe != NULL) { + if (on_safe != nullptr) { m_contexts.push_back(on_safe); } - if (m_flush_state == FLUSH_STATE_NONE) { - m_flush_state = FLUSH_STATE_REQUESTED; - flush_handler = m_flush_handler; - - // walk the chain backwards up to futures - if (m_prev_future) { - m_prev_future->flush(); - } - } + prev_future = prepare_flush(&flush_handlers); } } + // instruct prior futures to flush as well + while (prev_future) { + Mutex::Locker locker(prev_future->m_lock); + prev_future = prev_future->prepare_flush(&flush_handlers); + } + if (complete && on_safe != NULL) { on_safe->complete(m_return_value); - } else if (flush_handler) { + } else if (!flush_handlers.empty()) { // attached to journal object -- instruct it to flush all entries through // this one. possible to become detached while lock is released, so flush // will be re-requested by the object if it doesn't own the future - flush_handler->flush(this); + for (auto &pair : flush_handlers) { + pair.first->flush(pair.second); + } + } +} + +FutureImplPtr FutureImpl::prepare_flush(FlushHandlers *flush_handlers) { + assert(m_lock.is_locked()); + + if (m_flush_state == FLUSH_STATE_NONE) { + m_flush_state = FLUSH_STATE_REQUESTED; + + if (m_flush_handler && flush_handlers->count(m_flush_handler) == 0) { + flush_handlers->insert({m_flush_handler, this}); + } } + return m_prev_future; } void FutureImpl::wait(Context *on_safe) { diff --git a/src/journal/FutureImpl.h b/src/journal/FutureImpl.h index 0d5e86fbddf67..00542729beb5d 100644 --- a/src/journal/FutureImpl.h +++ b/src/journal/FutureImpl.h @@ -9,6 +9,7 @@ #include "common/RefCountedObj.h" #include "journal/Future.h" #include +#include #include #include #include "include/assert.h" @@ -76,6 +77,7 @@ class FutureImpl : public RefCountedObject, boost::noncopyable { private: friend std::ostream &operator<<(std::ostream &, const FutureImpl &); + typedef std::map FlushHandlers; typedef std::list Contexts; enum FlushState { @@ -110,6 +112,8 @@ class FutureImpl : public RefCountedObject, boost::noncopyable { C_ConsistentAck m_consistent_ack; Contexts m_contexts; + FutureImplPtr prepare_flush(FlushHandlers *flush_handlers); + void consistent(int r); void finish_unlock(); }; diff --git a/src/test/journal/test_FutureImpl.cc b/src/test/journal/test_FutureImpl.cc index eb5f80696bd62..af8ca76564242 100644 --- a/src/test/journal/test_FutureImpl.cc +++ b/src/test/journal/test_FutureImpl.cc @@ -154,14 +154,17 @@ TEST_F(TestFutureImpl, FlushChain) { future1); journal::FutureImplPtr future3 = create_future(235, 1, 458, future2); + + FlushHandler flush_handler; ASSERT_FALSE(future1->attach(&m_flush_handler)); - ASSERT_FALSE(future2->attach(&m_flush_handler)); + ASSERT_FALSE(future2->attach(&flush_handler)); ASSERT_FALSE(future3->attach(&m_flush_handler)); C_SaferCond cond; future3->flush(&cond); - ASSERT_EQ(3U, m_flush_handler.flushes); + ASSERT_EQ(1U, m_flush_handler.flushes); + ASSERT_EQ(1U, flush_handler.flushes); future3->safe(0); ASSERT_FALSE(future3->is_complete()); From 08a8ee98c03b5dfb30341c8d209f0c231b2c5d27 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Wed, 20 Jul 2016 16:17:41 -0400 Subject: [PATCH 08/16] journal: optimize speed of live replay journal pruning When streaming playback, avoid the unnecessary watch delay when one or more entries have been pruned. Signed-off-by: Jason Dillaman --- src/journal/JournalPlayer.cc | 28 +++++++++++++++++++++------ src/journal/ObjectPlayer.cc | 2 +- src/journal/ObjectPlayer.h | 18 +++++++++++------ src/test/journal/test_ObjectPlayer.cc | 3 ++- 4 files changed, 37 insertions(+), 14 deletions(-) diff --git a/src/journal/JournalPlayer.cc b/src/journal/JournalPlayer.cc index 750f936b20c71..09f2d2ca3f324 100644 --- a/src/journal/JournalPlayer.cc +++ b/src/journal/JournalPlayer.cc @@ -300,7 +300,7 @@ int JournalPlayer::process_prefetch(uint64_t object_number) { if (object_player->empty() && object_player->refetch_required()) { ldout(m_cct, 10) << "refetching potentially partially decoded object" << dendl; - object_player->clear_refetch_required(); + object_player->set_refetch_state(ObjectPlayer::REFETCH_STATE_NONE); fetch(object_player); } else if (!remove_empty_object_player(object_player)) { ldout(m_cct, 10) << "prefetch of object complete" << dendl; @@ -482,6 +482,7 @@ void JournalPlayer::prune_tag(uint64_t tag_tid) { m_prune_tag_tid = tag_tid; } + bool pruned = false; for (auto &player_pair : m_object_players) { ObjectPlayerPtr object_player(player_pair.second); ldout(m_cct, 15) << __func__ << ": checking " << object_player->get_oid() @@ -492,12 +493,25 @@ void JournalPlayer::prune_tag(uint64_t tag_tid) { if (entry.get_tag_tid() == tag_tid) { ldout(m_cct, 20) << __func__ << ": pruned " << entry << dendl; object_player->pop_front(); + pruned = true; } else { break; } } + } + + // avoid watch delay when pruning stale tags from journal objects + if (pruned) { + ldout(m_cct, 15) << __func__ << ": reseting refetch state to immediate" + << dendl; + for (auto &player_pair : m_object_players) { + ObjectPlayerPtr object_player(player_pair.second); + object_player->set_refetch_state(ObjectPlayer::REFETCH_STATE_IMMEDIATE); + } + } - // trim empty player to prefetch the next available object + // trim empty player to prefetch the next available object + for (auto &player_pair : m_object_players) { remove_empty_object_player(player_pair.second); } } @@ -565,7 +579,7 @@ bool JournalPlayer::remove_empty_object_player(const ObjectPlayerPtr &player) { << "require refetch" << dendl; m_active_set = active_set; for (auto &pair : m_object_players) { - pair.second->set_refetch_required(); + pair.second->set_refetch_state(ObjectPlayer::REFETCH_STATE_IMMEDIATE); } return false; } @@ -641,7 +655,7 @@ void JournalPlayer::refetch(bool immediate) { ObjectPlayerPtr object_player = get_object_player(); if (object_player->refetch_required()) { - object_player->clear_refetch_required(); + object_player->set_refetch_state(ObjectPlayer::REFETCH_STATE_NONE); fetch(object_player); return; } @@ -684,11 +698,13 @@ void JournalPlayer::schedule_watch(bool immediate) { uint64_t active_set = m_journal_metadata->get_active_set(); uint64_t object_set = object_player->get_object_number() / splay_width; if (immediate || + (object_player->get_refetch_state() == + ObjectPlayer::REFETCH_STATE_IMMEDIATE) || (object_set < active_set && object_player->refetch_required())) { - ldout(m_cct, 20) << __func__ << ": refetching " + ldout(m_cct, 20) << __func__ << ": immediately refetching " << object_player->get_oid() << dendl; - object_player->clear_refetch_required(); + object_player->set_refetch_state(ObjectPlayer::REFETCH_STATE_NONE); watch_interval = 0; } } diff --git a/src/journal/ObjectPlayer.cc b/src/journal/ObjectPlayer.cc index 43adb5da5cd63..92dd702615bcd 100644 --- a/src/journal/ObjectPlayer.cc +++ b/src/journal/ObjectPlayer.cc @@ -122,7 +122,7 @@ int ObjectPlayer::handle_fetch_complete(int r, const bufferlist &bl, assert(m_fetch_in_progress); m_read_off += bl.length(); m_read_bl.append(bl); - m_refetch_required = true; + m_refetch_state = REFETCH_STATE_REQUIRED; bool full_fetch = (m_max_fetch_bytes == 2U << m_order); bool partial_entry = false; diff --git a/src/journal/ObjectPlayer.h b/src/journal/ObjectPlayer.h index a9d2d9e985ec4..cff33dc3bcd9c 100644 --- a/src/journal/ObjectPlayer.h +++ b/src/journal/ObjectPlayer.h @@ -30,6 +30,12 @@ class ObjectPlayer : public RefCountedObject { typedef std::list Entries; typedef interval_set InvalidRanges; + enum RefetchState { + REFETCH_STATE_NONE, + REFETCH_STATE_REQUIRED, + REFETCH_STATE_IMMEDIATE + }; + ObjectPlayer(librados::IoCtx &ioctx, const std::string &object_oid_prefix, uint64_t object_num, SafeTimer &timer, Mutex &timer_lock, uint8_t order, uint64_t max_fetch_bytes); @@ -63,13 +69,13 @@ class ObjectPlayer : public RefCountedObject { } inline bool refetch_required() const { - return m_refetch_required; + return (get_refetch_state() != REFETCH_STATE_NONE); } - inline void set_refetch_required() { - m_refetch_required = true; + inline RefetchState get_refetch_state() const { + return m_refetch_state; } - inline void clear_refetch_required() { - m_refetch_required = false; + inline void set_refetch_state(RefetchState refetch_state) { + m_refetch_state = refetch_state; } private: @@ -124,7 +130,7 @@ class ObjectPlayer : public RefCountedObject { Context *m_watch_ctx = nullptr; bool m_unwatched = false; - bool m_refetch_required = true; + RefetchState m_refetch_state = REFETCH_STATE_IMMEDIATE; int handle_fetch_complete(int r, const bufferlist &bl, bool *refetch); diff --git a/src/test/journal/test_ObjectPlayer.cc b/src/test/journal/test_ObjectPlayer.cc index 880128ee8e19a..90a3334a7aa20 100644 --- a/src/test/journal/test_ObjectPlayer.cc +++ b/src/test/journal/test_ObjectPlayer.cc @@ -26,7 +26,8 @@ class TestObjectPlayer : public RadosTestFixture { int fetch(journal::ObjectPlayerPtr object_player) { while (true) { C_SaferCond ctx; - object_player->clear_refetch_required(); + object_player->set_refetch_state( + journal::ObjectPlayer::REFETCH_STATE_NONE); object_player->fetch(&ctx); int r = ctx.wait(); if (r < 0 || !object_player->refetch_required()) { From 47e0fbf231e52d00069c97b72c57c3158445bcf0 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Tue, 19 Jul 2016 00:42:16 -0400 Subject: [PATCH 09/16] librbd: wait for journal commit op event to be safely recorded Operation request op finish events should not be fire and forget. Instead, ensure the event is committed to the journal before completing the op. This will avoid several possible split-brain events during mirroring. Signed-off-by: Jason Dillaman --- src/librbd/AsyncRequest.h | 9 ++- src/librbd/Journal.cc | 9 +-- src/librbd/Journal.h | 12 ++-- src/librbd/operation/Request.cc | 64 ++++++++++++++++--- src/librbd/operation/Request.h | 26 ++++++-- src/librbd/operation/ResizeRequest.cc | 26 +++----- src/librbd/operation/SnapshotCreateRequest.cc | 30 +++------ src/librbd/operation/SnapshotCreateRequest.h | 1 - .../operation/SnapshotRollbackRequest.cc | 12 ++-- src/test/librbd/mock/MockJournal.h | 2 +- .../operation/test_mock_ResizeRequest.cc | 5 +- src/test/librbd/test_mock_fixture.cc | 3 +- 12 files changed, 123 insertions(+), 76 deletions(-) diff --git a/src/librbd/AsyncRequest.h b/src/librbd/AsyncRequest.h index a2a94735d70a9..f74368dc644f6 100644 --- a/src/librbd/AsyncRequest.h +++ b/src/librbd/AsyncRequest.h @@ -22,8 +22,7 @@ class AsyncRequest void complete(int r) { if (should_complete(r)) { r = filter_return_code(r); - finish(r); - delete this; + finish_and_destroy(r); } } @@ -50,6 +49,12 @@ class AsyncRequest return r; } + // NOTE: temporary until converted to new state machine format + virtual void finish_and_destroy(int r) { + finish(r); + delete this; + } + virtual void finish(int r) { finish_request(); m_on_finish->complete(r); diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc index dda73e38672d4..cf3e3fd9848d1 100644 --- a/src/librbd/Journal.cc +++ b/src/librbd/Journal.cc @@ -1008,7 +1008,7 @@ void Journal::append_op_event(uint64_t op_tid, } template -void Journal::commit_op_event(uint64_t op_tid, int r) { +void Journal::commit_op_event(uint64_t op_tid, int r, Context *on_safe) { CephContext *cct = m_image_ctx.cct; ldout(cct, 10) << this << " " << __func__ << ": op_tid=" << op_tid << ", " << "r=" << r << dendl; @@ -1035,7 +1035,7 @@ void Journal::commit_op_event(uint64_t op_tid, int r) { op_finish_future.flush(create_async_context_callback( m_image_ctx, new C_OpEventSafe(this, op_tid, op_start_future, - op_finish_future))); + op_finish_future, on_safe))); } template @@ -1647,7 +1647,8 @@ void Journal::handle_io_event_safe(int r, uint64_t tid) { template void Journal::handle_op_event_safe(int r, uint64_t tid, const Future &op_start_future, - const Future &op_finish_future) { + const Future &op_finish_future, + Context *on_safe) { CephContext *cct = m_image_ctx.cct; ldout(cct, 20) << this << " " << __func__ << ": r=" << r << ", " << "tid=" << tid << dendl; @@ -1663,7 +1664,7 @@ void Journal::handle_op_event_safe(int r, uint64_t tid, m_journaler->committed(op_finish_future); // reduce the replay window after committing an op event - m_journaler->flush_commit_position(nullptr); + m_journaler->flush_commit_position(on_safe); } template diff --git a/src/librbd/Journal.h b/src/librbd/Journal.h index d2b7ccb185f7a..496c8819f9636 100644 --- a/src/librbd/Journal.h +++ b/src/librbd/Journal.h @@ -143,7 +143,7 @@ class Journal { void append_op_event(uint64_t op_tid, journal::EventEntry &&event_entry, Context *on_safe); - void commit_op_event(uint64_t tid, int r); + void commit_op_event(uint64_t tid, int r, Context *on_safe); void replay_op_ready(uint64_t op_tid, Context *on_resume); void flush_event(uint64_t tid, Context *on_safe); @@ -221,15 +221,17 @@ class Journal { uint64_t tid; Future op_start_future; Future op_finish_future; + Context *on_safe; C_OpEventSafe(Journal *journal, uint64_t tid, const Future &op_start_future, - const Future &op_finish_future) + const Future &op_finish_future, Context *on_safe) : journal(journal), tid(tid), op_start_future(op_start_future), - op_finish_future(op_finish_future) { + op_finish_future(op_finish_future), on_safe(on_safe) { } virtual void finish(int r) { - journal->handle_op_event_safe(r, tid, op_start_future, op_finish_future); + journal->handle_op_event_safe(r, tid, op_start_future, op_finish_future, + on_safe); } }; @@ -348,7 +350,7 @@ class Journal { void handle_io_event_safe(int r, uint64_t tid); void handle_op_event_safe(int r, uint64_t tid, const Future &op_start_future, - const Future &op_finish_future); + const Future &op_finish_future, Context *on_safe); void stop_recording(); diff --git a/src/librbd/operation/Request.cc b/src/librbd/operation/Request.cc index e390b412ccb01..f1ad960d0c444 100644 --- a/src/librbd/operation/Request.cc +++ b/src/librbd/operation/Request.cc @@ -32,13 +32,40 @@ void Request::send() { } template -void Request::finish(int r) { - // automatically commit the event if we don't need to worry - // about affecting concurrent IO ops - if (r < 0 || !can_affect_io()) { - commit_op_event(r); +Context *Request::create_context_finisher(int r) { + // automatically commit the event if required (delete after commit) + if (m_appended_op_event && !m_committed_op_event && + commit_op_event(r)) { + return nullptr; } + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << dendl; + return util::create_context_callback, &Request::finish>(this); +} + +template +void Request::finish_and_destroy(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl; + + // automatically commit the event if required (delete after commit) + if (m_appended_op_event && !m_committed_op_event && + commit_op_event(r)) { + return; + } + + AsyncRequest::finish_and_destroy(r); +} + +template +void Request::finish(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl; + assert(!m_appended_op_event || m_committed_op_event); AsyncRequest::finish(r); } @@ -59,12 +86,12 @@ bool Request::append_op_event() { } template -void Request::commit_op_event(int r) { +bool Request::commit_op_event(int r) { I &image_ctx = this->m_image_ctx; RWLock::RLocker snap_locker(image_ctx.snap_lock); if (!m_appended_op_event) { - return; + return false; } assert(m_op_tid != 0); @@ -78,8 +105,27 @@ void Request::commit_op_event(int r) { // ops will be canceled / completed before closing journal assert(image_ctx.journal->is_journal_ready()); - image_ctx.journal->commit_op_event(m_op_tid, r); + image_ctx.journal->commit_op_event(m_op_tid, r, + new C_CommitOpEvent(this, r)); + return true; + } + return false; +} + +template +void Request::handle_commit_op_event(int r, int original_ret_val) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to commit op event to journal: " << cpp_strerror(r) + << dendl; + } + if (original_ret_val < 0) { + r = original_ret_val; } + finish(r); } template @@ -106,7 +152,7 @@ void Request::append_op_event(Context *on_safe) { m_op_tid = image_ctx.journal->allocate_op_tid(); image_ctx.journal->append_op_event( m_op_tid, journal::EventEntry{create_event(m_op_tid)}, - new C_OpEventSafe(this, on_safe)); + new C_AppendOpEvent(this, on_safe)); } template diff --git a/src/librbd/operation/Request.h b/src/librbd/operation/Request.h index d7f0e3f99582f..6a09cb1d63921 100644 --- a/src/librbd/operation/Request.h +++ b/src/librbd/operation/Request.h @@ -53,19 +53,16 @@ class Request : public AsyncRequest { } bool append_op_event(); - void commit_op_event(int r); // NOTE: temporary until converted to new state machine format - Context *create_context_finisher() { - return util::create_context_callback< - Request, &Request::finish>(this); - } + Context *create_context_finisher(int r); + virtual void finish_and_destroy(int r) override; private: - struct C_OpEventSafe : public Context { + struct C_AppendOpEvent : public Context { Request *request; Context *on_safe; - C_OpEventSafe(Request *request, Context *on_safe) + C_AppendOpEvent(Request *request, Context *on_safe) : request(request), on_safe(on_safe) { } virtual void finish(int r) override { @@ -76,6 +73,18 @@ class Request : public AsyncRequest { } }; + struct C_CommitOpEvent : public Context { + Request *request; + int ret_val; + C_CommitOpEvent(Request *request, int ret_val) + : request(request), ret_val(ret_val) { + } + virtual void finish(int r) override { + request->handle_commit_op_event(r, ret_val); + delete request; + } + }; + uint64_t m_op_tid = 0; bool m_appended_op_event = false; bool m_committed_op_event = false; @@ -84,6 +93,9 @@ class Request : public AsyncRequest { void append_op_event(Context *on_safe); void handle_op_event_safe(int r); + bool commit_op_event(int r); + void handle_commit_op_event(int r, int original_ret_val); + }; } // namespace operation diff --git a/src/librbd/operation/ResizeRequest.cc b/src/librbd/operation/ResizeRequest.cc index 0aef072c8f9b8..6fb50ae6f15a7 100644 --- a/src/librbd/operation/ResizeRequest.cc +++ b/src/librbd/operation/ResizeRequest.cc @@ -105,7 +105,7 @@ Context *ResizeRequest::handle_pre_block_writes(int *result) { if (*result < 0) { lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl; image_ctx.aio_work_queue->unblock_writes(); - return this->create_context_finisher(); + return this->create_context_finisher(*result); } return send_append_op_event(); @@ -141,7 +141,7 @@ Context *ResizeRequest::handle_append_op_event(int *result) { lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result) << dendl; image_ctx.aio_work_queue->unblock_writes(); - return this->create_context_finisher(); + return this->create_context_finisher(*result); } return send_grow_object_map(); @@ -169,10 +169,10 @@ Context *ResizeRequest::handle_trim_image(int *result) { if (*result == -ERESTART) { ldout(cct, 5) << "resize operation interrupted" << dendl; - return this->create_context_finisher(); + return this->create_context_finisher(*result); } else if (*result < 0) { lderr(cct) << "failed to trim image: " << cpp_strerror(*result) << dendl; - return this->create_context_finisher(); + return this->create_context_finisher(*result); } send_invalidate_cache(); @@ -202,7 +202,7 @@ Context *ResizeRequest::handle_invalidate_cache(int *result) { if (*result < 0) { lderr(cct) << "failed to invalidate cache: " << cpp_strerror(*result) << dendl; - return this->create_context_finisher(); + return this->create_context_finisher(*result); } send_post_block_writes(); @@ -220,10 +220,7 @@ Context *ResizeRequest::send_grow_object_map() { image_ctx.aio_work_queue->unblock_writes(); if (m_original_size == m_new_size) { - if (!m_disable_journal) { - this->commit_op_event(0); - } - return this->create_context_finisher(); + return this->create_context_finisher(0); } else if (m_new_size < m_original_size) { send_trim_image(); return nullptr; @@ -276,7 +273,7 @@ Context *ResizeRequest::send_shrink_object_map() { image_ctx.owner_lock.put_read(); update_size_and_overlap(); - return this->create_context_finisher(); + return this->create_context_finisher(0); } CephContext *cct = image_ctx.cct; @@ -304,7 +301,7 @@ Context *ResizeRequest::handle_shrink_object_map(int *result) { update_size_and_overlap(); assert(*result == 0); - return this->create_context_finisher(); + return this->create_context_finisher(0); } template @@ -328,7 +325,7 @@ Context *ResizeRequest::handle_post_block_writes(int *result) { image_ctx.aio_work_queue->unblock_writes(); lderr(cct) << "failed to block writes prior to header update: " << cpp_strerror(*result) << dendl; - return this->create_context_finisher(); + return this->create_context_finisher(*result); } send_update_header(); @@ -380,12 +377,9 @@ Context *ResizeRequest::handle_update_header(int *result) { lderr(cct) << "failed to update image header: " << cpp_strerror(*result) << dendl; image_ctx.aio_work_queue->unblock_writes(); - return this->create_context_finisher(); + return this->create_context_finisher(*result); } - if (!m_disable_journal) { - this->commit_op_event(0); - } return send_shrink_object_map(); } diff --git a/src/librbd/operation/SnapshotCreateRequest.cc b/src/librbd/operation/SnapshotCreateRequest.cc index 5e1a75069a15d..4f20ec4f977f1 100644 --- a/src/librbd/operation/SnapshotCreateRequest.cc +++ b/src/librbd/operation/SnapshotCreateRequest.cc @@ -115,7 +115,7 @@ Context *SnapshotCreateRequest::handle_suspend_aio(int *result) { if (*result < 0) { lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl; image_ctx.aio_work_queue->unblock_writes(); - return this->create_context_finisher(); + return this->create_context_finisher(*result); } send_append_op_event(); @@ -146,7 +146,7 @@ Context *SnapshotCreateRequest::handle_append_op_event(int *result) { image_ctx.aio_work_queue->unblock_writes(); lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result) << dendl; - return this->create_context_finisher(); + return this->create_context_finisher(*result); } send_allocate_snap_id(); @@ -175,10 +175,10 @@ Context *SnapshotCreateRequest::handle_allocate_snap_id(int *result) { if (*result < 0) { save_result(result); - finalize(*result); + image_ctx.aio_work_queue->unblock_writes(); lderr(cct) << "failed to allocate snapshot id: " << cpp_strerror(*result) << dendl; - return this->create_context_finisher(); + return this->create_context_finisher(*result); } send_create_snap(); @@ -250,8 +250,8 @@ Context *SnapshotCreateRequest::send_create_object_map() { if (image_ctx.object_map == nullptr || m_skip_object_map) { image_ctx.snap_lock.put_read(); - finalize(0); - return this->create_context_finisher(); + image_ctx.aio_work_queue->unblock_writes(); + return this->create_context_finisher(0); } CephContext *cct = image_ctx.cct; @@ -276,8 +276,8 @@ Context *SnapshotCreateRequest::handle_create_object_map(int *result) { assert(*result == 0); - finalize(0); - return this->create_context_finisher(); + image_ctx.aio_work_queue->unblock_writes(); + return this->create_context_finisher(0); } template @@ -304,20 +304,8 @@ Context *SnapshotCreateRequest::handle_release_snap_id(int *result) { assert(m_ret_val < 0); *result = m_ret_val; - finalize(m_ret_val); - return this->create_context_finisher(); -} - -template -void SnapshotCreateRequest::finalize(int r) { - I &image_ctx = this->m_image_ctx; - CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << ": r=" << r << dendl; - - if (r == 0) { - this->commit_op_event(0); - } image_ctx.aio_work_queue->unblock_writes(); + return this->create_context_finisher(m_ret_val); } template diff --git a/src/librbd/operation/SnapshotCreateRequest.h b/src/librbd/operation/SnapshotCreateRequest.h index c93f685abf5ba..c239d601eaebb 100644 --- a/src/librbd/operation/SnapshotCreateRequest.h +++ b/src/librbd/operation/SnapshotCreateRequest.h @@ -105,7 +105,6 @@ class SnapshotCreateRequest : public Request { void send_release_snap_id(); Context *handle_release_snap_id(int *result); - void finalize(int r); void update_snap_context(); void save_result(int *result) { diff --git a/src/librbd/operation/SnapshotRollbackRequest.cc b/src/librbd/operation/SnapshotRollbackRequest.cc index 0ab5f34d330f7..8edf5b763164c 100644 --- a/src/librbd/operation/SnapshotRollbackRequest.cc +++ b/src/librbd/operation/SnapshotRollbackRequest.cc @@ -107,7 +107,7 @@ Context *SnapshotRollbackRequest::handle_block_writes(int *result) { if (*result < 0) { lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl; - return this->create_context_finisher(); + return this->create_context_finisher(*result); } send_resize_image(); @@ -150,7 +150,7 @@ Context *SnapshotRollbackRequest::handle_resize_image(int *result) { if (*result < 0) { lderr(cct) << "failed to resize image for rollback: " << cpp_strerror(*result) << dendl; - return this->create_context_finisher(); + return this->create_context_finisher(*result); } send_rollback_object_map(); @@ -224,11 +224,11 @@ Context *SnapshotRollbackRequest::handle_rollback_objects(int *result) { if (*result == -ERESTART) { ldout(cct, 5) << "snapshot rollback operation interrupted" << dendl; - return this->create_context_finisher(); + return this->create_context_finisher(*result); } else if (*result < 0) { lderr(cct) << "failed to rollback objects: " << cpp_strerror(*result) << dendl; - return this->create_context_finisher(); + return this->create_context_finisher(*result); } return send_refresh_object_map(); @@ -276,7 +276,7 @@ Context *SnapshotRollbackRequest::send_invalidate_cache() { apply(); if (image_ctx.object_cacher == NULL) { - return this->create_context_finisher(); + return this->create_context_finisher(0); } CephContext *cct = image_ctx.cct; @@ -300,7 +300,7 @@ Context *SnapshotRollbackRequest::handle_invalidate_cache(int *result) { lderr(cct) << "failed to invalidate cache: " << cpp_strerror(*result) << dendl; } - return this->create_context_finisher(); + return this->create_context_finisher(*result); } template diff --git a/src/test/librbd/mock/MockJournal.h b/src/test/librbd/mock/MockJournal.h index f8ef75ad86419..cfcb12c06ec15 100644 --- a/src/test/librbd/mock/MockJournal.h +++ b/src/test/librbd/mock/MockJournal.h @@ -55,7 +55,7 @@ struct MockJournal { append_op_event_mock(op_tid, event_entry, on_safe); } - MOCK_METHOD2(commit_op_event, void(uint64_t, int)); + MOCK_METHOD3(commit_op_event, void(uint64_t, int, Context *)); MOCK_METHOD2(replay_op_ready, void(uint64_t, Context *)); MOCK_METHOD2(add_listener, void(journal::ListenerType, diff --git a/src/test/librbd/operation/test_mock_ResizeRequest.cc b/src/test/librbd/operation/test_mock_ResizeRequest.cc index 36aaa3d39d3af..86bde5061e429 100644 --- a/src/test/librbd/operation/test_mock_ResizeRequest.cc +++ b/src/test/librbd/operation/test_mock_ResizeRequest.cc @@ -181,8 +181,8 @@ TEST_F(TestMockOperationResizeRequest, GrowSuccess) { expect_grow_object_map(mock_image_ctx); expect_block_writes(mock_image_ctx, 0); expect_update_header(mock_image_ctx, 0); - expect_commit_op_event(mock_image_ctx, 0); expect_unblock_writes(mock_image_ctx); + expect_commit_op_event(mock_image_ctx, 0); ASSERT_EQ(0, when_resize(mock_image_ctx, ictx->size * 2, true, 0, false)); } @@ -207,9 +207,9 @@ TEST_F(TestMockOperationResizeRequest, ShrinkSuccess) { expect_invalidate_cache(mock_image_ctx, 0); expect_block_writes(mock_image_ctx, 0); expect_update_header(mock_image_ctx, 0); - expect_commit_op_event(mock_image_ctx, 0); expect_shrink_object_map(mock_image_ctx); expect_unblock_writes(mock_image_ctx); + expect_commit_op_event(mock_image_ctx, 0); ASSERT_EQ(0, when_resize(mock_image_ctx, ictx->size / 2, true, 0, false)); } @@ -227,7 +227,6 @@ TEST_F(TestMockOperationResizeRequest, ShrinkError) { InSequence seq; expect_block_writes(mock_image_ctx, -EINVAL); expect_unblock_writes(mock_image_ctx); - ASSERT_EQ(-EINVAL, when_resize(mock_image_ctx, ictx->size / 2, false, 0, false)); } diff --git a/src/test/librbd/test_mock_fixture.cc b/src/test/librbd/test_mock_fixture.cc index 4cc940cad47d3..c2644eb534773 100644 --- a/src/test/librbd/test_mock_fixture.cc +++ b/src/test/librbd/test_mock_fixture.cc @@ -112,7 +112,8 @@ void TestMockFixture::expect_commit_op_event(librbd::MockImageCtx &mock_image_ct if (mock_image_ctx.journal != nullptr) { expect_is_journal_replaying(*mock_image_ctx.journal); expect_is_journal_ready(*mock_image_ctx.journal); - EXPECT_CALL(*mock_image_ctx.journal, commit_op_event(1U, r)); + EXPECT_CALL(*mock_image_ctx.journal, commit_op_event(1U, r, _)) + .WillOnce(WithArg<2>(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue))); } } From 11d7500b9bcda7b7c1d8756ade3373f404257f32 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Wed, 20 Jul 2016 08:11:53 -0400 Subject: [PATCH 10/16] librbd: new configuration option to restrict journal payload size Ensure that, by default, IO journal events are broken up into manageable sizes when factoring in that an rbd-mirror daemon might be replaying events from thousands of images. Signed-off-by: Jason Dillaman --- src/common/config_opts.h | 1 + src/librbd/ImageCtx.cc | 4 +++- src/librbd/ImageCtx.h | 1 + src/librbd/Journal.cc | 1 + src/test/librbd/mock/MockImageCtx.h | 4 +++- 5 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 04ac61832e588..289bbc2e35942 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -1248,6 +1248,7 @@ OPTION(rbd_journal_object_flush_interval, OPT_INT, 0) // maximum number of pendi OPTION(rbd_journal_object_flush_bytes, OPT_INT, 0) // maximum number of pending bytes per journal object OPTION(rbd_journal_object_flush_age, OPT_DOUBLE, 0) // maximum age (in seconds) for pending commits OPTION(rbd_journal_pool, OPT_STR, "") // pool for journal objects +OPTION(rbd_journal_max_payload_bytes, OPT_U32, 16384) // maximum journal payload size before splitting /** * RBD Mirror options diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index 03a1e775e74ef..d0816e9864a97 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -928,7 +928,8 @@ struct C_InvalidateCache : public Context { "rbd_journal_object_flush_interval", false)( "rbd_journal_object_flush_bytes", false)( "rbd_journal_object_flush_age", false)( - "rbd_journal_pool", false); + "rbd_journal_pool", false)( + "rbd_journal_max_payload_bytes", false); md_config_t local_config_t; std::map res; @@ -983,6 +984,7 @@ struct C_InvalidateCache : public Context { ASSIGN_OPTION(journal_object_flush_bytes); ASSIGN_OPTION(journal_object_flush_age); ASSIGN_OPTION(journal_pool); + ASSIGN_OPTION(journal_max_payload_bytes); } ExclusiveLock *ImageCtx::create_exclusive_lock() { diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index f72d7ea68c98c..51d7a94af890b 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -182,6 +182,7 @@ namespace librbd { uint64_t journal_object_flush_bytes; double journal_object_flush_age; std::string journal_pool; + uint32_t journal_max_payload_bytes; LibrbdAdminSocketHook *asok_hook; diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc index cf3e3fd9848d1..671553f5b830d 100644 --- a/src/librbd/Journal.cc +++ b/src/librbd/Journal.cc @@ -1191,6 +1191,7 @@ void Journal::create_journaler() { transition_state(STATE_INITIALIZING, 0); ::journal::Settings settings; settings.commit_interval = m_image_ctx.journal_commit_age; + settings.max_payload_bytes = m_image_ctx.journal_max_payload_bytes; m_journaler = new Journaler(m_work_queue, m_timer, m_timer_lock, m_image_ctx.md_ctx, m_image_ctx.id, diff --git a/src/test/librbd/mock/MockImageCtx.h b/src/test/librbd/mock/MockImageCtx.h index 034db1c9f60ba..b7b73c835fa6f 100644 --- a/src/test/librbd/mock/MockImageCtx.h +++ b/src/test/librbd/mock/MockImageCtx.h @@ -86,7 +86,8 @@ struct MockImageCtx { journal_object_flush_interval(image_ctx.journal_object_flush_interval), journal_object_flush_bytes(image_ctx.journal_object_flush_bytes), journal_object_flush_age(image_ctx.journal_object_flush_age), - journal_pool(image_ctx.journal_pool) + journal_pool(image_ctx.journal_pool), + journal_max_payload_bytes(image_ctx.journal_max_payload_bytes) { md_ctx.dup(image_ctx.md_ctx); data_ctx.dup(image_ctx.data_ctx); @@ -240,6 +241,7 @@ struct MockImageCtx { uint64_t journal_object_flush_bytes; double journal_object_flush_age; std::string journal_pool; + uint32_t journal_max_payload_bytes; }; } // namespace librbd From 24883e0605907d1f9bcd1206c8a95c3bde30d5dc Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Tue, 19 Jul 2016 00:50:14 -0400 Subject: [PATCH 11/16] rbd-mirror: configuration options to control replay throttling Fixes: http://tracker.ceph.com/issues/16223 Signed-off-by: Jason Dillaman --- src/common/config_opts.h | 3 +++ src/test/rbd_mirror/test_ImageReplayer.cc | 1 + src/tools/rbd_mirror/ImageReplayer.cc | 9 ++++++--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 289bbc2e35942..1fa688926f2ff 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -1253,6 +1253,9 @@ OPTION(rbd_journal_max_payload_bytes, OPT_U32, 16384) // maximum journal payload /** * RBD Mirror options */ +OPTION(rbd_mirror_journal_commit_age, OPT_DOUBLE, 5) // commit time interval, seconds +OPTION(rbd_mirror_journal_poll_age, OPT_DOUBLE, 5) // maximum age (in seconds) between successive journal polls +OPTION(rbd_mirror_journal_max_fetch_bytes, OPT_U32, 32768) // maximum bytes to read from each journal data object per fetch OPTION(rbd_mirror_sync_point_update_age, OPT_DOUBLE, 30) // number of seconds between each update of the image sync point object number OPTION(rbd_mirror_concurrent_image_syncs, OPT_U32, 5) // maximum number of image syncs in parallel diff --git a/src/test/rbd_mirror/test_ImageReplayer.cc b/src/test/rbd_mirror/test_ImageReplayer.cc index ca60340b64963..4e85be9b7dc5e 100644 --- a/src/test/rbd_mirror/test_ImageReplayer.cc +++ b/src/test/rbd_mirror/test_ImageReplayer.cc @@ -78,6 +78,7 @@ class TestImageReplayer : public ::testing::Test { { EXPECT_EQ("", connect_cluster_pp(*m_local_cluster.get())); EXPECT_EQ(0, m_local_cluster->conf_set("rbd_cache", "false")); + EXPECT_EQ(0, m_local_cluster->conf_set("rbd_mirror_journal_poll_age", "1")); m_local_pool_name = get_temp_pool_name(); EXPECT_EQ(0, m_local_cluster->pool_create(m_local_pool_name.c_str())); diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index c2b25d5e379e5..e3c2c728c621f 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -374,7 +374,8 @@ void ImageReplayer::start(Context *on_finish, bool manual) CephContext *cct = static_cast(m_local->cct()); journal::Settings settings; - settings.commit_interval = cct->_conf->rbd_journal_commit_age; + settings.commit_interval = cct->_conf->rbd_mirror_journal_commit_age; + settings.max_fetch_bytes = cct->_conf->rbd_mirror_journal_max_fetch_bytes; m_remote_journaler = new Journaler(m_threads->work_queue, m_threads->timer, @@ -564,10 +565,12 @@ void ImageReplayer::handle_start_replay(int r) { } { + CephContext *cct = static_cast(m_local->cct()); + double poll_seconds = cct->_conf->rbd_mirror_journal_poll_age; + Mutex::Locker locker(m_lock); m_replay_handler = new ReplayHandler(this); - m_remote_journaler->start_live_replay(m_replay_handler, - 1 /* TODO: configurable */); + m_remote_journaler->start_live_replay(m_replay_handler, poll_seconds); dout(20) << "m_remote_journaler=" << *m_remote_journaler << dendl; } From 73cdd08007c27d2c3c41fe644601e7a144f21c82 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Tue, 19 Jul 2016 13:50:20 -0400 Subject: [PATCH 12/16] rbd-mirror: shut down image replayers in parallel When multiple pools are being replicated, start the shut down process concurrently across all pool replayers. Signed-off-by: Jason Dillaman --- src/tools/rbd_mirror/ImageReplayer.cc | 2 +- src/tools/rbd_mirror/Mirror.cc | 9 ++++++++- src/tools/rbd_mirror/Replayer.cc | 16 +++++++++------- src/tools/rbd_mirror/Replayer.h | 2 +- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index e3c2c728c621f..801a2d03d0bf0 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -660,7 +660,7 @@ void ImageReplayer::stop(Context *on_finish, bool manual) } if (!running) { - derr << "not running" << dendl; + dout(20) << "not running" << dendl; if (on_finish) { on_finish->complete(-EINVAL); } diff --git a/src/tools/rbd_mirror/Mirror.cc b/src/tools/rbd_mirror/Mirror.cc index 7add4d09b34ba..de8e37851a164 100644 --- a/src/tools/rbd_mirror/Mirror.cc +++ b/src/tools/rbd_mirror/Mirror.cc @@ -239,6 +239,13 @@ void Mirror::run() // TODO: make interval configurable m_cond.WaitInterval(g_ceph_context, m_lock, seconds(30)); } + + // stop all replayers in parallel + Mutex::Locker locker(m_lock); + for (auto it = m_replayers.begin(); it != m_replayers.end(); it++) { + auto &replayer = it->second; + replayer->stop(false); + } dout(20) << "return" << dendl; } @@ -313,7 +320,7 @@ void Mirror::stop() for (auto it = m_replayers.begin(); it != m_replayers.end(); it++) { auto &replayer = it->second; - replayer->stop(); + replayer->stop(true); } } diff --git a/src/tools/rbd_mirror/Replayer.cc b/src/tools/rbd_mirror/Replayer.cc index 9cefd78edeaa0..fd13a8dec61b6 100644 --- a/src/tools/rbd_mirror/Replayer.cc +++ b/src/tools/rbd_mirror/Replayer.cc @@ -73,7 +73,7 @@ class StopCommand : public ReplayerAdminSocketCommand { explicit StopCommand(Replayer *replayer) : replayer(replayer) {} bool call(Formatter *f, stringstream *ss) { - replayer->stop(); + replayer->stop(true); return true; } @@ -513,18 +513,20 @@ void Replayer::start() } } -void Replayer::stop() +void Replayer::stop(bool manual) { - dout(20) << "enter" << dendl; + dout(20) << "enter: manual=" << manual << dendl; Mutex::Locker l(m_lock); - - if (m_stopping.read()) { + if (!manual) { + m_stopping.set(1); + m_cond.Signal(); + return; + } else if (m_stopping.read()) { return; } m_manual_stop = true; - for (auto &kv : m_image_replayers) { auto &image_replayer = kv.second; image_replayer->stop(nullptr, true); @@ -767,7 +769,7 @@ bool Replayer::stop_image_replayer(unique_ptr > &image_replayer) } FunctionContext *ctx = new FunctionContext( [&image_replayer, this] (int r) { - if (!m_stopping.read()) { + if (!m_stopping.read() && r >= 0) { m_image_deleter->schedule_image_delete( m_local_rados, image_replayer->get_local_pool_id(), diff --git a/src/tools/rbd_mirror/Replayer.h b/src/tools/rbd_mirror/Replayer.h index c5b975e26bba4..81db162220234 100644 --- a/src/tools/rbd_mirror/Replayer.h +++ b/src/tools/rbd_mirror/Replayer.h @@ -48,7 +48,7 @@ class Replayer { void print_status(Formatter *f, stringstream *ss); void start(); - void stop(); + void stop(bool manual); void restart(); void flush(); From 0275c7ca23b27dc5250cd33f317e2273470a9fe8 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Tue, 19 Jul 2016 15:42:27 -0400 Subject: [PATCH 13/16] rbd-mirror: fix issues detected when attempting clean shut down Fixed lockdep issue from status update callback and fixed the potential for a stuck status state. Signed-off-by: Jason Dillaman --- src/tools/rbd_mirror/ImageReplayer.cc | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index 801a2d03d0bf0..151f7130421ab 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -633,6 +633,7 @@ void ImageReplayer::stop(Context *on_finish, bool manual) { dout(20) << "on_finish=" << on_finish << dendl; + image_replayer::BootstrapRequest *bootstrap_request = nullptr; bool shut_down_replay = false; bool running = true; { @@ -644,7 +645,8 @@ void ImageReplayer::stop(Context *on_finish, bool manual) if (m_state == STATE_STARTING) { dout(20) << "canceling start" << dendl; if (m_bootstrap_request) { - m_bootstrap_request->cancel(); + bootstrap_request = m_bootstrap_request; + bootstrap_request->get(); } } else { dout(20) << "interrupting replay" << dendl; @@ -659,6 +661,12 @@ void ImageReplayer::stop(Context *on_finish, bool manual) } } + // avoid holding lock since bootstrap request will update status + if (bootstrap_request != nullptr) { + bootstrap_request->cancel(); + bootstrap_request->put(); + } + if (!running) { dout(20) << "not running" << dendl; if (on_finish) { @@ -1173,9 +1181,12 @@ void ImageReplayer::send_mirror_status_update(const OptionalState &opt_state) { Context *on_req_finish = new FunctionContext( [this](int r) { + dout(20) << "replay status ready: r=" << r << dendl; if (r >= 0) { - dout(20) << "replay status ready" << dendl; send_mirror_status_update(boost::none); + } else if (r == -EAGAIN) { + // decrement in-flight status update counter + handle_mirror_status_update(r); } }); From e6cdf955bad500561ddada2791641eba5fb27762 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Tue, 19 Jul 2016 15:46:49 -0400 Subject: [PATCH 14/16] rbd-mirror: potential memory leak when attempting to cancel image sync The cancel request could race with the actual scheduling of the image sync operation. Signed-off-by: Jason Dillaman --- .../image_replayer/BootstrapRequest.cc | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc index f471626e80f14..0028dcc29b514 100644 --- a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc @@ -549,12 +549,22 @@ void BootstrapRequest::image_sync() { BootstrapRequest, &BootstrapRequest::handle_image_sync>( this); - m_image_sync_throttler->start_sync(*m_local_image_ctx, - m_remote_image_ctx, m_timer, - m_timer_lock, - m_local_mirror_uuid, m_journaler, - m_client_meta, m_work_queue, ctx, - m_progress_ctx); + { + Mutex::Locker locker(m_lock); + if (!m_canceled) { + m_image_sync_throttler->start_sync(*m_local_image_ctx, + m_remote_image_ctx, m_timer, + m_timer_lock, + m_local_mirror_uuid, m_journaler, + m_client_meta, m_work_queue, ctx, + m_progress_ctx); + return; + } + } + + dout(10) << ": request canceled" << dendl; + m_ret_val = -ECANCELED; + close_remote_image(); } template From 862e581553fff510286b58135a1fd69705c06096 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 21 Jul 2016 07:28:54 -0400 Subject: [PATCH 15/16] rbd-mirror: do not cancel maintenance ops with missing finish events librbd will replay these ops when opening an image, so rbd-mirror should also ensure these ops are replayed. Signed-off-by: Jason Dillaman --- src/tools/rbd_mirror/ImageReplayer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index 151f7130421ab..30988700be6e2 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -895,7 +895,7 @@ void ImageReplayer::replay_flush() { ImageReplayer, &ImageReplayer::handle_stop_replay_request>(this); m_local_journal->start_external_replay(&m_local_replay, ctx, stop_ctx); }); - m_local_replay->shut_down(true, ctx); + m_local_replay->shut_down(false, ctx); } template From 574be7486ad737892422aed0322f80e5750a75a0 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Wed, 20 Jul 2016 16:18:23 -0400 Subject: [PATCH 16/16] qa/workunits/rbd: override rbd-mirror integration test poll frequency Signed-off-by: Jason Dillaman --- qa/workunits/rbd/rbd_mirror_helpers.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/qa/workunits/rbd/rbd_mirror_helpers.sh b/qa/workunits/rbd/rbd_mirror_helpers.sh index 2c22327e1d7cd..e276be53b1285 100755 --- a/qa/workunits/rbd/rbd_mirror_helpers.sh +++ b/qa/workunits/rbd/rbd_mirror_helpers.sh @@ -197,6 +197,7 @@ start_mirror() --pid-file=$(daemon_pid_file "${cluster}") \ --log-file=${TEMPDIR}/rbd-mirror.${cluster}_daemon.\$cluster.\$pid.log \ --admin-socket=${TEMPDIR}/rbd-mirror.${cluster}_daemon.\$cluster.asok \ + --rbd-mirror-journal-poll-age=1 \ --debug-rbd=30 --debug-journaler=30 \ --debug-rbd_mirror=30 \ --daemonize=true