diff --git a/src/librbd/AioCompletion.cc b/src/librbd/AioCompletion.cc index 1e892acb79473..e4044fa82a544 100644 --- a/src/librbd/AioCompletion.cc +++ b/src/librbd/AioCompletion.cc @@ -9,7 +9,6 @@ #include "common/perf_counters.h" #include "common/WorkQueue.h" -#include "librbd/AioObjectRequest.h" #include "librbd/ImageCtx.h" #include "librbd/internal.h" @@ -28,239 +27,198 @@ namespace librbd { - int AioCompletion::wait_for_complete() { - tracepoint(librbd, aio_wait_for_complete_enter, this); - lock.Lock(); - while (state != STATE_COMPLETE) - cond.Wait(lock); - lock.Unlock(); - tracepoint(librbd, aio_wait_for_complete_exit, 0); - return 0; - } +int AioCompletion::wait_for_complete() { + tracepoint(librbd, aio_wait_for_complete_enter, this); + lock.Lock(); + while (state != STATE_COMPLETE) + cond.Wait(lock); + lock.Unlock(); + tracepoint(librbd, aio_wait_for_complete_exit, 0); + return 0; +} - void AioCompletion::finalize(ssize_t rval) - { - assert(lock.is_locked()); - assert(ictx != nullptr); - CephContext *cct = ictx->cct; - - ldout(cct, 20) << this << " " << __func__ << ": r=" << rval << ", " - << "read_buf=" << reinterpret_cast(read_buf) << ", " - << "real_bl=" << reinterpret_cast(read_bl) << dendl; - if (rval >= 0 && aio_type == AIO_TYPE_READ) { - if (read_buf && !read_bl) { - destriper.assemble_result(cct, read_buf, read_buf_len); - } else { - // FIXME: make the destriper write directly into a buffer so - // that we avoid shuffling pointers and copying zeros around. - bufferlist bl; - destriper.assemble_result(cct, bl, true); - - if (read_buf) { - assert(bl.length() == read_buf_len); - bl.copy(0, read_buf_len, read_buf); - ldout(cct, 20) << "copied resulting " << bl.length() - << " bytes to " << (void*)read_buf << dendl; - } - if (read_bl) { - ldout(cct, 20) << " moving resulting " << bl.length() - << " bytes to bl " << (void*)read_bl << dendl; - read_bl->claim(bl); - } +void AioCompletion::finalize(ssize_t rval) +{ + assert(lock.is_locked()); + assert(ictx != nullptr); + CephContext *cct = ictx->cct; + + ldout(cct, 20) << this << " " << __func__ << ": r=" << rval << ", " + << "read_buf=" << reinterpret_cast(read_buf) << ", " + << "real_bl=" << reinterpret_cast(read_bl) << dendl; + if (rval >= 0 && aio_type == AIO_TYPE_READ) { + if (read_buf && !read_bl) { + destriper.assemble_result(cct, read_buf, read_buf_len); + } else { + // FIXME: make the destriper write directly into a buffer so + // that we avoid shuffling pointers and copying zeros around. + bufferlist bl; + destriper.assemble_result(cct, bl, true); + + if (read_buf) { + assert(bl.length() == read_buf_len); + bl.copy(0, read_buf_len, read_buf); + ldout(cct, 20) << "copied resulting " << bl.length() + << " bytes to " << (void*)read_buf << dendl; + } + if (read_bl) { + ldout(cct, 20) << " moving resulting " << bl.length() + << " bytes to bl " << (void*)read_bl << dendl; + read_bl->claim(bl); } } } +} - void AioCompletion::complete() { - assert(lock.is_locked()); - assert(ictx != nullptr); - CephContext *cct = ictx->cct; - - tracepoint(librbd, aio_complete_enter, this, rval); - utime_t elapsed; - elapsed = ceph_clock_now(cct) - start_time; - switch (aio_type) { - case AIO_TYPE_OPEN: - case AIO_TYPE_CLOSE: - break; - case AIO_TYPE_READ: - ictx->perfcounter->tinc(l_librbd_rd_latency, elapsed); break; - case AIO_TYPE_WRITE: - ictx->perfcounter->tinc(l_librbd_wr_latency, elapsed); break; - case AIO_TYPE_DISCARD: - ictx->perfcounter->tinc(l_librbd_discard_latency, elapsed); break; - case AIO_TYPE_FLUSH: - ictx->perfcounter->tinc(l_librbd_aio_flush_latency, elapsed); break; - default: - lderr(cct) << "completed invalid aio_type: " << aio_type << dendl; - break; - } - - // inform the journal that the op has successfully committed - if (journal_tid != 0) { - assert(ictx->journal != NULL); - ictx->journal->commit_io_event(journal_tid, rval); - } - - state = STATE_CALLBACK; - if (complete_cb) { - lock.Unlock(); - complete_cb(rbd_comp, complete_arg); - lock.Lock(); - } - - if (ictx && event_notify && ictx->event_socket.is_valid()) { - ictx->completed_reqs_lock.Lock(); - ictx->completed_reqs.push_back(&m_xlist_item); - ictx->completed_reqs_lock.Unlock(); - ictx->event_socket.notify(); - } - - state = STATE_COMPLETE; - cond.Signal(); - - // note: possible for image to be closed after op marked finished - if (async_op.started()) { - async_op.finish_op(); - } - tracepoint(librbd, aio_complete_exit); - } - - void AioCompletion::init_time(ImageCtx *i, aio_type_t t) { - Mutex::Locker locker(lock); - if (ictx == nullptr) { - ictx = i; - aio_type = t; - start_time = ceph_clock_now(ictx->cct); - } +void AioCompletion::complete() { + assert(lock.is_locked()); + assert(ictx != nullptr); + CephContext *cct = ictx->cct; + + tracepoint(librbd, aio_complete_enter, this, rval); + utime_t elapsed; + elapsed = ceph_clock_now(cct) - start_time; + switch (aio_type) { + case AIO_TYPE_OPEN: + case AIO_TYPE_CLOSE: + break; + case AIO_TYPE_READ: + ictx->perfcounter->tinc(l_librbd_rd_latency, elapsed); break; + case AIO_TYPE_WRITE: + ictx->perfcounter->tinc(l_librbd_wr_latency, elapsed); break; + case AIO_TYPE_DISCARD: + ictx->perfcounter->tinc(l_librbd_discard_latency, elapsed); break; + case AIO_TYPE_FLUSH: + ictx->perfcounter->tinc(l_librbd_aio_flush_latency, elapsed); break; + default: + lderr(cct) << "completed invalid aio_type: " << aio_type << dendl; + break; } - void AioCompletion::start_op(bool ignore_type) { - Mutex::Locker locker(lock); - assert(ictx != nullptr); - assert(!async_op.started()); - if (state == STATE_PENDING && (ignore_type || aio_type != AIO_TYPE_FLUSH)) { - async_op.start_op(*ictx); - } + // inform the journal that the op has successfully committed + if (journal_tid != 0) { + assert(ictx->journal != NULL); + ictx->journal->commit_io_event(journal_tid, rval); } - void AioCompletion::fail(int r) - { + state = STATE_CALLBACK; + if (complete_cb) { + lock.Unlock(); + complete_cb(rbd_comp, complete_arg); lock.Lock(); - assert(ictx != nullptr); - CephContext *cct = ictx->cct; - - lderr(cct) << this << " " << __func__ << ": " << cpp_strerror(r) - << dendl; - assert(pending_count == 0); - rval = r; - complete(); - put_unlock(); } - void AioCompletion::set_request_count(uint32_t count) { - lock.Lock(); - assert(ictx != nullptr); - CephContext *cct = ictx->cct; + if (ictx && event_notify && ictx->event_socket.is_valid()) { + ictx->completed_reqs_lock.Lock(); + ictx->completed_reqs.push_back(&m_xlist_item); + ictx->completed_reqs_lock.Unlock(); + ictx->event_socket.notify(); + } - ldout(cct, 20) << this << " " << __func__ << ": pending=" << count << dendl; - assert(pending_count == 0); - pending_count = count; - lock.Unlock(); + state = STATE_COMPLETE; + cond.Signal(); - // if no pending requests, completion will fire now - unblock(); + // note: possible for image to be closed after op marked finished + if (async_op.started()) { + async_op.finish_op(); } + tracepoint(librbd, aio_complete_exit); +} - void AioCompletion::complete_request(ssize_t r) - { - lock.Lock(); - assert(ictx != nullptr); - CephContext *cct = ictx->cct; - - if (rval >= 0) { - if (r < 0 && r != -EEXIST) - rval = r; - else if (r > 0) - rval += r; - } - assert(pending_count); - int count = --pending_count; - - ldout(cct, 20) << this << " " << __func__ << ": cb=" << complete_cb << ", " - << "pending=" << pending_count << dendl; - if (!count && blockers == 0) { - finalize(rval); - complete(); - } - put_unlock(); +void AioCompletion::init_time(ImageCtx *i, aio_type_t t) { + Mutex::Locker locker(lock); + if (ictx == nullptr) { + ictx = i; + aio_type = t; + start_time = ceph_clock_now(ictx->cct); } +} - void AioCompletion::associate_journal_event(uint64_t tid) { - Mutex::Locker l(lock); - assert(state == STATE_PENDING); - journal_tid = tid; +void AioCompletion::start_op(bool ignore_type) { + Mutex::Locker locker(lock); + assert(ictx != nullptr); + assert(!async_op.started()); + if (state == STATE_PENDING && (ignore_type || aio_type != AIO_TYPE_FLUSH)) { + async_op.start_op(*ictx); } +} - bool AioCompletion::is_complete() { - tracepoint(librbd, aio_is_complete_enter, this); - bool done; - { - Mutex::Locker l(lock); - done = this->state == STATE_COMPLETE; - } - tracepoint(librbd, aio_is_complete_exit, done); - return done; - } +void AioCompletion::fail(int r) +{ + lock.Lock(); + assert(ictx != nullptr); + CephContext *cct = ictx->cct; + + lderr(cct) << this << " " << __func__ << ": " << cpp_strerror(r) + << dendl; + assert(pending_count == 0); + rval = r; + complete(); + put_unlock(); +} - ssize_t AioCompletion::get_return_value() { - tracepoint(librbd, aio_get_return_value_enter, this); - lock.Lock(); - ssize_t r = rval; - lock.Unlock(); - tracepoint(librbd, aio_get_return_value_exit, r); - return r; - } +void AioCompletion::set_request_count(uint32_t count) { + lock.Lock(); + assert(ictx != nullptr); + CephContext *cct = ictx->cct; - void C_AioRead::finish(int r) - { - m_completion->lock.Lock(); - CephContext *cct = m_completion->ictx->cct; - ldout(cct, 10) << "C_AioRead::finish() " << this << " r = " << r << dendl; - - if (r >= 0 || r == -ENOENT) { // this was a sparse_read operation - ldout(cct, 10) << " got " << m_req->m_ext_map - << " for " << m_req->m_buffer_extents - << " bl " << m_req->data().length() << dendl; - // reads from the parent don't populate the m_ext_map and the overlap - // may not be the full buffer. compensate here by filling in m_ext_map - // with the read extent when it is empty. - if (m_req->m_ext_map.empty()) - m_req->m_ext_map[m_req->m_object_off] = m_req->data().length(); - - m_completion->destriper.add_partial_sparse_result( - cct, m_req->data(), m_req->m_ext_map, m_req->m_object_off, - m_req->m_buffer_extents); - r = m_req->m_object_len; - } - m_completion->lock.Unlock(); + ldout(cct, 20) << this << " " << __func__ << ": pending=" << count << dendl; + assert(pending_count == 0); + pending_count = count; + lock.Unlock(); + + // if no pending requests, completion will fire now + unblock(); +} - C_AioRequest::finish(r); +void AioCompletion::complete_request(ssize_t r) +{ + lock.Lock(); + assert(ictx != nullptr); + CephContext *cct = ictx->cct; + + if (rval >= 0) { + if (r < 0 && r != -EEXIST) + rval = r; + else if (r > 0) + rval += r; } + assert(pending_count); + int count = --pending_count; - void C_CacheRead::complete(int r) { - if (!m_enqueued) { - // cache_lock creates a lock ordering issue -- so re-execute this context - // outside the cache_lock - m_enqueued = true; - m_image_ctx.op_work_queue->queue(this, r); - return; - } - Context::complete(r); + ldout(cct, 20) << this << " " << __func__ << ": cb=" << complete_cb << ", " + << "pending=" << pending_count << dendl; + if (!count && blockers == 0) { + finalize(rval); + complete(); } + put_unlock(); +} - void C_CacheRead::finish(int r) +void AioCompletion::associate_journal_event(uint64_t tid) { + Mutex::Locker l(lock); + assert(state == STATE_PENDING); + journal_tid = tid; +} + +bool AioCompletion::is_complete() { + tracepoint(librbd, aio_is_complete_enter, this); + bool done; { - m_req->complete(r); + Mutex::Locker l(lock); + done = this->state == STATE_COMPLETE; } + tracepoint(librbd, aio_is_complete_exit, done); + return done; } + +ssize_t AioCompletion::get_return_value() { + tracepoint(librbd, aio_get_return_value_enter, this); + lock.Lock(); + ssize_t r = rval; + lock.Unlock(); + tracepoint(librbd, aio_get_return_value_exit, r); + return r; +} + +} // namespace librbd diff --git a/src/librbd/AioCompletion.h b/src/librbd/AioCompletion.h index e259a484698a4..6a83677cd964b 100644 --- a/src/librbd/AioCompletion.h +++ b/src/librbd/AioCompletion.h @@ -1,7 +1,8 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab -#ifndef CEPH_LIBRBD_AIOCOMPLETION_H -#define CEPH_LIBRBD_AIOCOMPLETION_H + +#ifndef CEPH_LIBRBD_AIO_COMPLETION_H +#define CEPH_LIBRBD_AIO_COMPLETION_H #include "common/Cond.h" #include "common/Mutex.h" @@ -18,248 +19,220 @@ class CephContext; namespace librbd { - class AioObjectRead; - - typedef enum { - AIO_TYPE_NONE = 0, - AIO_TYPE_OPEN, - AIO_TYPE_CLOSE, - AIO_TYPE_READ, - AIO_TYPE_WRITE, - AIO_TYPE_DISCARD, - AIO_TYPE_FLUSH, - } aio_type_t; - - typedef enum { - STATE_PENDING = 0, - STATE_CALLBACK, - STATE_COMPLETE, - } aio_state_t; - - /** - * AioCompletion is the overall completion for a single - * rbd I/O request. It may be composed of many AioObjectRequests, - * which each go to a single object. - * - * The retrying of individual requests is handled at a lower level, - * so all AioCompletion cares about is the count of outstanding - * requests. The number of expected individual requests should be - * set initially using set_request_count() prior to issuing the - * requests. This ensures that the completion will not be completed - * within the caller's thread of execution (instead via a librados - * context or via a thread pool context for cache read hits). - */ - struct AioCompletion { - mutable Mutex lock; - Cond cond; - aio_state_t state; - ssize_t rval; - callback_t complete_cb; - void *complete_arg; - rbd_completion_t rbd_comp; - uint32_t pending_count; ///< number of requests - uint32_t blockers; - int ref; - bool released; - ImageCtx *ictx; - utime_t start_time; - aio_type_t aio_type; - - Striper::StripedReadResult destriper; - bufferlist *read_bl; - char *read_buf; - size_t read_buf_len; - - AsyncOperation async_op; - - uint64_t journal_tid; - xlist::item m_xlist_item; - bool event_notify; - - template - static void callback_adapter(completion_t cb, void *arg) { - AioCompletion *comp = reinterpret_cast(cb); - T *t = reinterpret_cast(arg); - (t->*MF)(comp->get_return_value()); - comp->release(); - } - - static AioCompletion *create(void *cb_arg, callback_t cb_complete, - rbd_completion_t rbd_comp) { - AioCompletion *comp = new AioCompletion(); - comp->set_complete_cb(cb_arg, cb_complete); - comp->rbd_comp = (rbd_comp != nullptr ? rbd_comp : comp); - return comp; - } - - template - static AioCompletion *create(T *obj) { - AioCompletion *comp = new AioCompletion(); - comp->set_complete_cb(obj, &callback_adapter); - comp->rbd_comp = comp; - return comp; - } - - template - static AioCompletion *create_and_start(T *obj, ImageCtx *image_ctx, - aio_type_t type) { - AioCompletion *comp = create(obj); - comp->init_time(image_ctx, type); - comp->start_op(); - return comp; - } - - AioCompletion() : lock("AioCompletion::lock", true, false), - state(STATE_PENDING), rval(0), complete_cb(NULL), - complete_arg(NULL), rbd_comp(NULL), - pending_count(0), blockers(1), - ref(1), released(false), ictx(NULL), - aio_type(AIO_TYPE_NONE), - read_bl(NULL), read_buf(NULL), read_buf_len(0), - journal_tid(0), - m_xlist_item(this), event_notify(false) { - } - ~AioCompletion() { - } - - int wait_for_complete(); - - void finalize(ssize_t rval); - - inline bool is_initialized(aio_type_t type) const { - Mutex::Locker locker(lock); - return ((ictx != nullptr) && (aio_type == type)); - } - inline bool is_started() const { - Mutex::Locker locker(lock); - return async_op.started(); - } - - void init_time(ImageCtx *i, aio_type_t t); - void start_op(bool ignore_type = false); - void fail(int r); - - void complete(); - - void set_complete_cb(void *cb_arg, callback_t cb) { - complete_cb = cb; - complete_arg = cb_arg; - } - - void set_request_count(uint32_t num); - void add_request() { - lock.Lock(); - assert(pending_count > 0); - lock.Unlock(); - get(); - } - void complete_request(ssize_t r); - - void associate_journal_event(uint64_t tid); - - bool is_complete(); - - ssize_t get_return_value(); - - void get() { - lock.Lock(); - assert(ref > 0); - ref++; - lock.Unlock(); - } - void release() { - lock.Lock(); - assert(!released); - released = true; - put_unlock(); - } - void put() { - lock.Lock(); - put_unlock(); - } - void put_unlock() { - assert(ref > 0); - int n = --ref; - lock.Unlock(); - if (!n) { - if (ictx) { - if (event_notify) { - ictx->completed_reqs_lock.Lock(); - m_xlist_item.remove_myself(); - ictx->completed_reqs_lock.Unlock(); - } - if (aio_type == AIO_TYPE_CLOSE || (aio_type == AIO_TYPE_OPEN && - rval < 0)) { - delete ictx; - } - } - delete this; - } - } - - void block() { - Mutex::Locker l(lock); - ++blockers; - } - void unblock() { - Mutex::Locker l(lock); - assert(blockers > 0); - --blockers; - if (pending_count == 0 && blockers == 0) { - finalize(rval); - complete(); +typedef enum { + AIO_TYPE_NONE = 0, + AIO_TYPE_OPEN, + AIO_TYPE_CLOSE, + AIO_TYPE_READ, + AIO_TYPE_WRITE, + AIO_TYPE_DISCARD, + AIO_TYPE_FLUSH, +} aio_type_t; + +typedef enum { + STATE_PENDING = 0, + STATE_CALLBACK, + STATE_COMPLETE, +} aio_state_t; + +/** + * AioCompletion is the overall completion for a single + * rbd I/O request. It may be composed of many AioObjectRequests, + * which each go to a single object. + * + * The retrying of individual requests is handled at a lower level, + * so all AioCompletion cares about is the count of outstanding + * requests. The number of expected individual requests should be + * set initially using set_request_count() prior to issuing the + * requests. This ensures that the completion will not be completed + * within the caller's thread of execution (instead via a librados + * context or via a thread pool context for cache read hits). + */ +struct AioCompletion { + mutable Mutex lock; + Cond cond; + aio_state_t state; + ssize_t rval; + callback_t complete_cb; + void *complete_arg; + rbd_completion_t rbd_comp; + uint32_t pending_count; ///< number of requests + uint32_t blockers; + int ref; + bool released; + ImageCtx *ictx; + utime_t start_time; + aio_type_t aio_type; + + Striper::StripedReadResult destriper; + bufferlist *read_bl; + char *read_buf; + size_t read_buf_len; + + AsyncOperation async_op; + + uint64_t journal_tid; + xlist::item m_xlist_item; + bool event_notify; + + template + static void callback_adapter(completion_t cb, void *arg) { + AioCompletion *comp = reinterpret_cast(cb); + T *t = reinterpret_cast(arg); + (t->*MF)(comp->get_return_value()); + comp->release(); + } + + static AioCompletion *create(void *cb_arg, callback_t cb_complete, + rbd_completion_t rbd_comp) { + AioCompletion *comp = new AioCompletion(); + comp->set_complete_cb(cb_arg, cb_complete); + comp->rbd_comp = (rbd_comp != nullptr ? rbd_comp : comp); + return comp; + } + + template + static AioCompletion *create(T *obj) { + AioCompletion *comp = new AioCompletion(); + comp->set_complete_cb(obj, &callback_adapter); + comp->rbd_comp = comp; + return comp; + } + + template + static AioCompletion *create_and_start(T *obj, ImageCtx *image_ctx, + aio_type_t type) { + AioCompletion *comp = create(obj); + comp->init_time(image_ctx, type); + comp->start_op(); + return comp; + } + + AioCompletion() : lock("AioCompletion::lock", true, false), + state(STATE_PENDING), rval(0), complete_cb(NULL), + complete_arg(NULL), rbd_comp(NULL), + pending_count(0), blockers(1), + ref(1), released(false), ictx(NULL), + aio_type(AIO_TYPE_NONE), + read_bl(NULL), read_buf(NULL), read_buf_len(0), + journal_tid(0), m_xlist_item(this), event_notify(false) { + } + + ~AioCompletion() { + } + + int wait_for_complete(); + + void finalize(ssize_t rval); + + inline bool is_initialized(aio_type_t type) const { + Mutex::Locker locker(lock); + return ((ictx != nullptr) && (aio_type == type)); + } + inline bool is_started() const { + Mutex::Locker locker(lock); + return async_op.started(); + } + + void init_time(ImageCtx *i, aio_type_t t); + void start_op(bool ignore_type = false); + void fail(int r); + + void complete(); + + void set_complete_cb(void *cb_arg, callback_t cb) { + complete_cb = cb; + complete_arg = cb_arg; + } + + void set_request_count(uint32_t num); + void add_request() { + lock.Lock(); + assert(pending_count > 0); + lock.Unlock(); + get(); + } + void complete_request(ssize_t r); + + void associate_journal_event(uint64_t tid); + + bool is_complete(); + + ssize_t get_return_value(); + + void get() { + lock.Lock(); + assert(ref > 0); + ref++; + lock.Unlock(); + } + void release() { + lock.Lock(); + assert(!released); + released = true; + put_unlock(); + } + void put() { + lock.Lock(); + put_unlock(); + } + void put_unlock() { + assert(ref > 0); + int n = --ref; + lock.Unlock(); + if (!n) { + if (ictx) { + if (event_notify) { + ictx->completed_reqs_lock.Lock(); + m_xlist_item.remove_myself(); + ictx->completed_reqs_lock.Unlock(); + } + if (aio_type == AIO_TYPE_CLOSE || + (aio_type == AIO_TYPE_OPEN && rval < 0)) { + delete ictx; + } } - } - - void set_event_notify(bool s) { - Mutex::Locker l(lock); - event_notify = s; - } - - void *get_arg() { - return complete_arg; - } - }; - - class C_AioRequest : public Context { - public: - C_AioRequest(AioCompletion *completion) : m_completion(completion) { - m_completion->add_request(); - } - virtual ~C_AioRequest() {} - virtual void finish(int r) { - m_completion->complete_request(r); - } - protected: - AioCompletion *m_completion; - }; - - class C_AioRead : public C_AioRequest { - public: - C_AioRead(AioCompletion *completion) - : C_AioRequest(completion), m_req(nullptr) { - } - virtual ~C_AioRead() {} - virtual void finish(int r); - void set_req(AioObjectRead *req) { - m_req = req; - } - private: - AioObjectRead *m_req; - }; - - class C_CacheRead : public Context { - public: - explicit C_CacheRead(ImageCtx *ictx, AioObjectRead *req) - : m_image_ctx(*ictx), m_req(req), m_enqueued(false) {} - virtual void complete(int r); - protected: - virtual void finish(int r); - private: - ImageCtx &m_image_ctx; - AioObjectRead *m_req; - bool m_enqueued; - }; -} - -#endif + delete this; + } + } + + void block() { + Mutex::Locker l(lock); + ++blockers; + } + void unblock() { + Mutex::Locker l(lock); + assert(blockers > 0); + --blockers; + if (pending_count == 0 && blockers == 0) { + finalize(rval); + complete(); + } + } + + void set_event_notify(bool s) { + Mutex::Locker l(lock); + event_notify = s; + } + + void *get_arg() { + return complete_arg; + } +}; + +class C_AioRequest : public Context { +public: + C_AioRequest(AioCompletion *completion) : m_completion(completion) { + m_completion->add_request(); + } + virtual ~C_AioRequest() {} + virtual void finish(int r) { + m_completion->complete_request(r); + } +protected: + AioCompletion *m_completion; +}; + +} // namespace librbd + +#endif // CEPH_LIBRBD_AIO_COMPLETION_H diff --git a/src/librbd/AioImageRequest.cc b/src/librbd/AioImageRequest.cc index 1c2a4b140c0f9..dd1e26aa49fe2 100644 --- a/src/librbd/AioImageRequest.cc +++ b/src/librbd/AioImageRequest.cc @@ -7,8 +7,10 @@ #include "librbd/ImageCtx.h" #include "librbd/internal.h" #include "librbd/Journal.h" +#include "librbd/Utils.h" #include "librbd/journal/Types.h" #include "include/rados/librados.hpp" +#include "common/WorkQueue.h" #include "osdc/Striper.h" #define dout_subsys ceph_subsys_rbd @@ -17,16 +19,19 @@ namespace librbd { +using util::get_image_ctx; + namespace { +template struct C_DiscardJournalCommit : public Context { typedef std::vector ObjectExtents; - ImageCtx &image_ctx; + ImageCtxT &image_ctx; AioCompletion *aio_comp; ObjectExtents object_extents; - C_DiscardJournalCommit(ImageCtx &_image_ctx, AioCompletion *_aio_comp, + C_DiscardJournalCommit(ImageCtxT &_image_ctx, AioCompletion *_aio_comp, const ObjectExtents &_object_extents, uint64_t tid) : image_ctx(_image_ctx), aio_comp(_aio_comp), object_extents(_object_extents) { @@ -49,11 +54,12 @@ struct C_DiscardJournalCommit : public Context { } }; +template struct C_FlushJournalCommit : public Context { - ImageCtx &image_ctx; + ImageCtxT &image_ctx; AioCompletion *aio_comp; - C_FlushJournalCommit(ImageCtx &_image_ctx, AioCompletion *_aio_comp, + C_FlushJournalCommit(ImageCtxT &_image_ctx, AioCompletion *_aio_comp, uint64_t tid) : image_ctx(_image_ctx), aio_comp(_aio_comp) { CephContext *cct = image_ctx.cct; @@ -72,6 +78,74 @@ struct C_FlushJournalCommit : public Context { } }; +template +class C_AioRead : public C_AioRequest { +public: + C_AioRead(AioCompletion *completion) + : C_AioRequest(completion), m_req(nullptr) { + } + + virtual void finish(int r) { + m_completion->lock.Lock(); + CephContext *cct = m_completion->ictx->cct; + ldout(cct, 10) << "C_AioRead::finish() " << this << " r = " << r << dendl; + + if (r >= 0 || r == -ENOENT) { // this was a sparse_read operation + ldout(cct, 10) << " got " << m_req->get_extent_map() + << " for " << m_req->get_buffer_extents() + << " bl " << m_req->data().length() << dendl; + // reads from the parent don't populate the m_ext_map and the overlap + // may not be the full buffer. compensate here by filling in m_ext_map + // with the read extent when it is empty. + if (m_req->get_extent_map().empty()) { + m_req->get_extent_map()[m_req->get_offset()] = m_req->data().length(); + } + + m_completion->destriper.add_partial_sparse_result( + cct, m_req->data(), m_req->get_extent_map(), m_req->get_offset(), + m_req->get_buffer_extents()); + r = m_req->get_length(); + } + m_completion->lock.Unlock(); + + C_AioRequest::finish(r); + } + + void set_req(AioObjectRead *req) { + m_req = req; + } +private: + AioObjectRead *m_req; +}; + +template +class C_CacheRead : public Context { +public: + explicit C_CacheRead(ImageCtxT &ictx, AioObjectRead *req) + : m_image_ctx(ictx), m_req(req), m_enqueued(false) {} + + virtual void complete(int r) { + if (!m_enqueued) { + // cache_lock creates a lock ordering issue -- so re-execute this context + // outside the cache_lock + m_enqueued = true; + m_image_ctx.op_work_queue->queue(this, r); + return; + } + Context::complete(r); + } + +protected: + virtual void finish(int r) { + m_req->complete(r); + } + +private: + ImageCtxT &m_image_ctx; + AioObjectRead *m_req; + bool m_enqueued; +}; + } // anonymous namespace template @@ -79,7 +153,7 @@ void AioImageRequest::aio_read( I *ictx, AioCompletion *c, const std::vector > &extents, char *buf, bufferlist *pbl, int op_flags) { - AioImageRead req(*ictx, c, extents, buf, pbl, op_flags); + AioImageRead req(*ictx, c, extents, buf, pbl, op_flags); req.send(); } @@ -87,7 +161,7 @@ template void AioImageRequest::aio_read(I *ictx, AioCompletion *c, uint64_t off, size_t len, char *buf, bufferlist *pbl, int op_flags) { - AioImageRead req(*ictx, c, off, len, buf, pbl, op_flags); + AioImageRead req(*ictx, c, off, len, buf, pbl, op_flags); req.send(); } @@ -95,92 +169,97 @@ template void AioImageRequest::aio_write(I *ictx, AioCompletion *c, uint64_t off, size_t len, const char *buf, int op_flags) { - AioImageWrite req(*ictx, c, off, len, buf, op_flags); + AioImageWrite req(*ictx, c, off, len, buf, op_flags); req.send(); } template void AioImageRequest::aio_discard(I *ictx, AioCompletion *c, uint64_t off, uint64_t len) { - AioImageDiscard req(*ictx, c, off, len); + AioImageDiscard req(*ictx, c, off, len); req.send(); } template void AioImageRequest::aio_flush(I *ictx, AioCompletion *c) { - assert(c->is_initialized(AIO_TYPE_FLUSH)); - AioImageFlush req(*ictx, c); + AioImageFlush req(*ictx, c); req.send(); } template void AioImageRequest::send() { - assert(m_image_ctx.owner_lock.is_locked()); + I &image_ctx = this->m_image_ctx; + assert(image_ctx.owner_lock.is_locked()); assert(m_aio_comp->is_initialized(get_aio_type())); assert(m_aio_comp->is_started() ^ (get_aio_type() == AIO_TYPE_FLUSH)); - CephContext *cct = m_image_ctx.cct; - ldout(cct, 20) << get_request_type() << ": ictx=" << &m_image_ctx << ", " - << "completion=" << m_aio_comp << dendl; + CephContext *cct = image_ctx.cct; + AioCompletion *aio_comp = this->m_aio_comp; + ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", " + << "completion=" << aio_comp << dendl; - m_aio_comp->get(); + aio_comp->get(); send_request(); } template void AioImageRequest::fail(int r) { - m_aio_comp->get(); - m_aio_comp->fail(r); + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->get(); + aio_comp->fail(r); } -void AioImageRead::send_request() { - CephContext *cct = m_image_ctx.cct; +template +void AioImageRead::send_request() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; - if (m_image_ctx.object_cacher && m_image_ctx.readahead_max_bytes > 0 && + if (image_ctx.object_cacher && image_ctx.readahead_max_bytes > 0 && !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) { - readahead(&m_image_ctx, m_image_extents); + readahead(get_image_ctx(&image_ctx), m_image_extents); } + AioCompletion *aio_comp = this->m_aio_comp; librados::snap_t snap_id; map > object_extents; uint64_t buffer_ofs = 0; { // prevent image size from changing between computing clip and recording // pending async operation - RWLock::RLocker snap_locker(m_image_ctx.snap_lock); - snap_id = m_image_ctx.snap_id; + RWLock::RLocker snap_locker(image_ctx.snap_lock); + snap_id = image_ctx.snap_id; // map for (vector >::const_iterator p = m_image_extents.begin(); p != m_image_extents.end(); ++p) { uint64_t len = p->second; - int r = clip_io(&m_image_ctx, p->first, &len); + int r = clip_io(get_image_ctx(&image_ctx), p->first, &len); if (r < 0) { - m_aio_comp->fail(r); + aio_comp->fail(r); return; } if (len == 0) { continue; } - Striper::file_to_extents(cct, m_image_ctx.format_string, - &m_image_ctx.layout, p->first, len, 0, + Striper::file_to_extents(cct, image_ctx.format_string, + &image_ctx.layout, p->first, len, 0, object_extents, buffer_ofs); buffer_ofs += len; } } - m_aio_comp->read_buf = m_buf; - m_aio_comp->read_buf_len = buffer_ofs; - m_aio_comp->read_bl = m_pbl; + aio_comp->read_buf = m_buf; + aio_comp->read_buf_len = buffer_ofs; + aio_comp->read_bl = m_pbl; // pre-calculate the expected number of read requests uint32_t request_count = 0; for (auto &object_extent : object_extents) { request_count += object_extent.second.size(); } - m_aio_comp->set_request_count(request_count); + aio_comp->set_request_count(request_count); // issue the requests for (auto &object_extent : object_extents) { @@ -189,74 +268,76 @@ void AioImageRead::send_request() { << extent.length << " from " << extent.buffer_extents << dendl; - C_AioRead *req_comp = new C_AioRead(m_aio_comp); - AioObjectRead *req = new AioObjectRead(&m_image_ctx, extent.oid.name, - extent.objectno, extent.offset, - extent.length, - extent.buffer_extents, snap_id, - true, req_comp, m_op_flags); + C_AioRead *req_comp = new C_AioRead(aio_comp); + AioObjectRead *req = AioObjectRead::create( + &image_ctx, extent.oid.name, extent.objectno, extent.offset, + extent.length, extent.buffer_extents, snap_id, true, req_comp, + m_op_flags); req_comp->set_req(req); - if (m_image_ctx.object_cacher) { - C_CacheRead *cache_comp = new C_CacheRead(&m_image_ctx, req); - m_image_ctx.aio_read_from_cache(extent.oid, extent.objectno, - &req->data(), extent.length, - extent.offset, cache_comp, m_op_flags); + if (image_ctx.object_cacher) { + C_CacheRead *cache_comp = new C_CacheRead(image_ctx, req); + image_ctx.aio_read_from_cache(extent.oid, extent.objectno, + &req->data(), extent.length, + extent.offset, cache_comp, m_op_flags); } else { req->send(); } } } - m_aio_comp->put(); + aio_comp->put(); - m_image_ctx.perfcounter->inc(l_librbd_rd); - m_image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs); + image_ctx.perfcounter->inc(l_librbd_rd); + image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs); } -void AbstractAioImageWrite::send_request() { - CephContext *cct = m_image_ctx.cct; +template +void AbstractAioImageWrite::send_request() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; - RWLock::RLocker md_locker(m_image_ctx.md_lock); + RWLock::RLocker md_locker(image_ctx.md_lock); bool journaling = false; + AioCompletion *aio_comp = this->m_aio_comp; uint64_t clip_len = m_len; ObjectExtents object_extents; ::SnapContext snapc; { // prevent image size from changing between computing clip and recording // pending async operation - RWLock::RLocker snap_locker(m_image_ctx.snap_lock); - if (m_image_ctx.snap_id != CEPH_NOSNAP || m_image_ctx.read_only) { - m_aio_comp->fail(-EROFS); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.snap_id != CEPH_NOSNAP || image_ctx.read_only) { + aio_comp->fail(-EROFS); return; } - int r = clip_io(&m_image_ctx, m_off, &clip_len); + int r = clip_io(get_image_ctx(&image_ctx), m_off, &clip_len); if (r < 0) { - m_aio_comp->fail(r); + aio_comp->fail(r); return; } - snapc = m_image_ctx.snapc; + snapc = image_ctx.snapc; // map to object extents if (clip_len > 0) { - Striper::file_to_extents(cct, m_image_ctx.format_string, - &m_image_ctx.layout, m_off, clip_len, 0, + Striper::file_to_extents(cct, image_ctx.format_string, + &image_ctx.layout, m_off, clip_len, 0, object_extents); } - journaling = (m_image_ctx.journal != NULL && - !m_image_ctx.journal->is_journal_replaying()); + journaling = (image_ctx.journal != nullptr && + image_ctx.journal->is_journal_appending()); } prune_object_extents(object_extents); if (!object_extents.empty()) { uint64_t journal_tid = 0; - m_aio_comp->set_request_count( + aio_comp->set_request_count( object_extents.size() + get_cache_request_count(journaling)); AioObjectRequests requests; @@ -265,33 +346,37 @@ void AbstractAioImageWrite::send_request() { if (journaling) { // in-flight ops are flushed prior to closing the journal - assert(m_image_ctx.journal != NULL); + assert(image_ctx.journal != NULL); journal_tid = append_journal_event(requests, m_synchronous); } - if (m_image_ctx.object_cacher != NULL) { + if (image_ctx.object_cacher != NULL) { send_cache_requests(object_extents, journal_tid); } } else { // no IO to perform -- fire completion - m_aio_comp->unblock(); + aio_comp->unblock(); } update_stats(clip_len); - m_aio_comp->put(); + aio_comp->put(); } -void AbstractAioImageWrite::send_object_requests( +template +void AbstractAioImageWrite::send_object_requests( const ObjectExtents &object_extents, const ::SnapContext &snapc, AioObjectRequests *aio_object_requests) { - CephContext *cct = m_image_ctx.cct; + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + AioCompletion *aio_comp = this->m_aio_comp; for (ObjectExtents::const_iterator p = object_extents.begin(); p != object_extents.end(); ++p) { ldout(cct, 20) << " oid " << p->oid << " " << p->offset << "~" << p->length << " from " << p->buffer_extents << dendl; - C_AioRequest *req_comp = new C_AioRequest(m_aio_comp); - AioObjectRequest *request = create_object_request(*p, snapc, req_comp); + C_AioRequest *req_comp = new C_AioRequest(aio_comp); + AioObjectRequestHandle *request = create_object_request(*p, snapc, + req_comp); // if journaling, stash the request for later; otherwise send if (request != NULL) { @@ -304,92 +389,111 @@ void AbstractAioImageWrite::send_object_requests( } } -void AioImageWrite::assemble_extent(const ObjectExtent &object_extent, +template +void AioImageWrite::assemble_extent(const ObjectExtent &object_extent, bufferlist *bl) { - for (Extents::const_iterator q = object_extent.buffer_extents.begin(); + for (auto q = object_extent.buffer_extents.begin(); q != object_extent.buffer_extents.end(); ++q) { bl->append(m_buf + q->first, q->second);; } } -uint64_t AioImageWrite::append_journal_event( +template +uint64_t AioImageWrite::append_journal_event( const AioObjectRequests &requests, bool synchronous) { bufferlist bl; - bl.append(m_buf, m_len); - - uint64_t tid = m_image_ctx.journal->append_write_event(m_off, m_len, bl, - requests, synchronous); - if (m_image_ctx.object_cacher == NULL) { - m_aio_comp->associate_journal_event(tid); + bl.append(m_buf, this->m_len); + + I &image_ctx = this->m_image_ctx; + uint64_t tid = image_ctx.journal->append_write_event(this->m_off, this->m_len, + bl, requests, + synchronous); + if (image_ctx.object_cacher == NULL) { + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->associate_journal_event(tid); } return tid; } -void AioImageWrite::send_cache_requests(const ObjectExtents &object_extents, +template +void AioImageWrite::send_cache_requests(const ObjectExtents &object_extents, uint64_t journal_tid) { - for (ObjectExtents::const_iterator p = object_extents.begin(); - p != object_extents.end(); ++p) { + I &image_ctx = this->m_image_ctx; + for (auto p = object_extents.begin(); p != object_extents.end(); ++p) { const ObjectExtent &object_extent = *p; bufferlist bl; assemble_extent(object_extent, &bl); - C_AioRequest *req_comp = new C_AioRequest(m_aio_comp); - m_image_ctx.write_to_cache(object_extent.oid, bl, object_extent.length, - object_extent.offset, req_comp, m_op_flags, + AioCompletion *aio_comp = this->m_aio_comp; + C_AioRequest *req_comp = new C_AioRequest(aio_comp); + image_ctx.write_to_cache(object_extent.oid, bl, object_extent.length, + object_extent.offset, req_comp, m_op_flags, journal_tid); } } -void AioImageWrite::send_object_requests( +template +void AioImageWrite::send_object_requests( const ObjectExtents &object_extents, const ::SnapContext &snapc, AioObjectRequests *aio_object_requests) { + I &image_ctx = this->m_image_ctx; + // cache handles creating object requests during writeback - if (m_image_ctx.object_cacher == NULL) { - AbstractAioImageWrite::send_object_requests(object_extents, snapc, + if (image_ctx.object_cacher == NULL) { + AbstractAioImageWrite::send_object_requests(object_extents, snapc, aio_object_requests); } } -AioObjectRequest *AioImageWrite::create_object_request( +template +AioObjectRequestHandle *AioImageWrite::create_object_request( const ObjectExtent &object_extent, const ::SnapContext &snapc, Context *on_finish) { - assert(m_image_ctx.object_cacher == NULL); + I &image_ctx = this->m_image_ctx; + assert(image_ctx.object_cacher == NULL); bufferlist bl; assemble_extent(object_extent, &bl); - AioObjectWrite *req = new AioObjectWrite(&m_image_ctx, - object_extent.oid.name, - object_extent.objectno, - object_extent.offset, bl, - snapc, on_finish); - req->set_op_flags(m_op_flags); + AioObjectRequest *req = AioObjectRequest::create_write( + &image_ctx, object_extent.oid.name, object_extent.objectno, + object_extent.offset, bl, snapc, on_finish, m_op_flags); return req; } -void AioImageWrite::update_stats(size_t length) { - m_image_ctx.perfcounter->inc(l_librbd_wr); - m_image_ctx.perfcounter->inc(l_librbd_wr_bytes, length); +template +void AioImageWrite::update_stats(size_t length) { + I &image_ctx = this->m_image_ctx; + image_ctx.perfcounter->inc(l_librbd_wr); + image_ctx.perfcounter->inc(l_librbd_wr_bytes, length); } -uint64_t AioImageDiscard::append_journal_event( +template +uint64_t AioImageDiscard::append_journal_event( const AioObjectRequests &requests, bool synchronous) { - journal::EventEntry event_entry(journal::AioDiscardEvent(m_off, m_len)); - uint64_t tid = m_image_ctx.journal->append_io_event(std::move(event_entry), - requests, m_off, m_len, - synchronous); - m_aio_comp->associate_journal_event(tid); + I &image_ctx = this->m_image_ctx; + + journal::EventEntry event_entry(journal::AioDiscardEvent(this->m_off, + this->m_len)); + uint64_t tid = image_ctx.journal->append_io_event(std::move(event_entry), + requests, this->m_off, + this->m_len, synchronous); + + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->associate_journal_event(tid); return tid; } -void AioImageDiscard::prune_object_extents(ObjectExtents &object_extents) { - CephContext *cct = m_image_ctx.cct; +template +void AioImageDiscard::prune_object_extents(ObjectExtents &object_extents) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; if (!cct->_conf->rbd_skip_partial_discard) { return; } for (auto p = object_extents.begin(); p != object_extents.end(); ) { - if (p->offset + p->length < m_image_ctx.layout.object_size) { + if (p->offset + p->length < image_ctx.layout.object_size) { ldout(cct, 20) << " oid " << p->oid << " " << p->offset << "~" << p->length << " from " << p->buffer_extents << ": skip partial discard" << dendl; @@ -400,88 +504,106 @@ void AioImageDiscard::prune_object_extents(ObjectExtents &object_extents) { } } -uint32_t AioImageDiscard::get_cache_request_count(bool journaling) const { +template +uint32_t AioImageDiscard::get_cache_request_count(bool journaling) const { // extra completion request is required for tracking journal commit - return (m_image_ctx.object_cacher != nullptr && journaling ? 1 : 0); + I &image_ctx = this->m_image_ctx; + return (image_ctx.object_cacher != nullptr && journaling ? 1 : 0); } -void AioImageDiscard::send_cache_requests(const ObjectExtents &object_extents, +template +void AioImageDiscard::send_cache_requests(const ObjectExtents &object_extents, uint64_t journal_tid) { + I &image_ctx = this->m_image_ctx; if (journal_tid == 0) { - Mutex::Locker cache_locker(m_image_ctx.cache_lock); - m_image_ctx.object_cacher->discard_set(m_image_ctx.object_set, - object_extents); + Mutex::Locker cache_locker(image_ctx.cache_lock); + image_ctx.object_cacher->discard_set(image_ctx.object_set, + object_extents); } else { // cannot discard from cache until journal has committed - assert(m_image_ctx.journal != NULL); - m_image_ctx.journal->wait_event( - journal_tid, new C_DiscardJournalCommit(m_image_ctx, m_aio_comp, - object_extents, journal_tid)); + assert(image_ctx.journal != NULL); + AioCompletion *aio_comp = this->m_aio_comp; + image_ctx.journal->wait_event( + journal_tid, new C_DiscardJournalCommit(image_ctx, aio_comp, + object_extents, journal_tid)); } } -AioObjectRequest *AioImageDiscard::create_object_request( +template +AioObjectRequestHandle *AioImageDiscard::create_object_request( const ObjectExtent &object_extent, const ::SnapContext &snapc, Context *on_finish) { - AioObjectRequest *req; - if (object_extent.length == m_image_ctx.layout.object_size) { - req = new AioObjectRemove(&m_image_ctx, object_extent.oid.name, - object_extent.objectno, snapc, on_finish); + I &image_ctx = this->m_image_ctx; + + AioObjectRequest *req; + if (object_extent.length == image_ctx.layout.object_size) { + req = AioObjectRequest::create_remove( + &image_ctx, object_extent.oid.name, object_extent.objectno, snapc, + on_finish); } else if (object_extent.offset + object_extent.length == - m_image_ctx.layout.object_size) { - req = new AioObjectTruncate(&m_image_ctx, object_extent.oid.name, - object_extent.objectno, object_extent.offset, - snapc, on_finish); + image_ctx.layout.object_size) { + req = AioObjectRequest::create_truncate( + &image_ctx, object_extent.oid.name, object_extent.objectno, + object_extent.offset, snapc, on_finish); } else { - req = new AioObjectZero(&m_image_ctx, object_extent.oid.name, - object_extent.objectno, object_extent.offset, - object_extent.length, snapc, on_finish); + req = AioObjectRequest::create_zero( + &image_ctx, object_extent.oid.name, object_extent.objectno, + object_extent.offset, object_extent.length, snapc, on_finish); } return req; } -void AioImageDiscard::update_stats(size_t length) { - m_image_ctx.perfcounter->inc(l_librbd_discard); - m_image_ctx.perfcounter->inc(l_librbd_discard_bytes, length); +template +void AioImageDiscard::update_stats(size_t length) { + I &image_ctx = this->m_image_ctx; + image_ctx.perfcounter->inc(l_librbd_discard); + image_ctx.perfcounter->inc(l_librbd_discard_bytes, length); } -void AioImageFlush::send_request() { +template +void AioImageFlush::send_request() { + I &image_ctx = this->m_image_ctx; bool journaling = false; { - RWLock::RLocker snap_locker(m_image_ctx.snap_lock); - journaling = (m_image_ctx.journal != NULL && - !m_image_ctx.journal->is_journal_replaying()); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + journaling = (image_ctx.journal != nullptr && + image_ctx.journal->is_journal_appending()); } + AioCompletion *aio_comp = this->m_aio_comp; if (journaling) { // in-flight ops are flushed prior to closing the journal - uint64_t journal_tid = m_image_ctx.journal->append_io_event( + uint64_t journal_tid = image_ctx.journal->append_io_event( journal::EventEntry(journal::AioFlushEvent()), AioObjectRequests(), 0, 0, false); - m_aio_comp->set_request_count(2); + aio_comp->set_request_count(2); - C_FlushJournalCommit *ctx = new C_FlushJournalCommit(m_image_ctx, - m_aio_comp, - journal_tid); - C_AioRequest *req_comp = new C_AioRequest(m_aio_comp); - m_image_ctx.journal->flush_event(journal_tid, ctx); - m_aio_comp->associate_journal_event(journal_tid); - m_image_ctx.flush_async_operations(req_comp); + C_FlushJournalCommit *ctx = new C_FlushJournalCommit(image_ctx, + aio_comp, + journal_tid); + C_AioRequest *req_comp = new C_AioRequest(aio_comp); + image_ctx.journal->flush_event(journal_tid, ctx); + aio_comp->associate_journal_event(journal_tid); + image_ctx.flush_async_operations(req_comp); } else { // flush rbd cache only when journaling is not enabled - m_aio_comp->set_request_count(1); - C_AioRequest *req_comp = new C_AioRequest(m_aio_comp); - m_image_ctx.flush(req_comp); + aio_comp->set_request_count(1); + C_AioRequest *req_comp = new C_AioRequest(aio_comp); + image_ctx.flush(req_comp); } // track flush op for block writes - m_aio_comp->start_op(true); - m_aio_comp->put(); + aio_comp->start_op(true); + aio_comp->put(); - m_image_ctx.perfcounter->inc(l_librbd_aio_flush); + image_ctx.perfcounter->inc(l_librbd_aio_flush); } } // namespace librbd template class librbd::AioImageRequest; +template class librbd::AbstractAioImageWrite; +template class librbd::AioImageWrite; +template class librbd::AioImageDiscard; +template class librbd::AioImageFlush; diff --git a/src/librbd/AioImageRequest.h b/src/librbd/AioImageRequest.h index b36c09b27132d..3d6b3851b35b0 100644 --- a/src/librbd/AioImageRequest.h +++ b/src/librbd/AioImageRequest.h @@ -15,9 +15,9 @@ namespace librbd { -class AioObjectRequest; -class ImageCtx; class AioCompletion; +class AioObjectRequestHandle; +class ImageCtx; template class AioImageRequest { @@ -49,7 +49,7 @@ class AioImageRequest { void fail(int r); protected: - typedef std::list AioObjectRequests; + typedef std::list AioObjectRequests; ImageCtxT &m_image_ctx; AioCompletion *m_aio_comp; @@ -62,20 +62,24 @@ class AioImageRequest { virtual const char *get_request_type() const = 0; }; -class AioImageRead : public AioImageRequest<> { +template +class AioImageRead : public AioImageRequest { public: - AioImageRead(ImageCtx &image_ctx, AioCompletion *aio_comp, uint64_t off, + using typename AioImageRequest::Extents; + + AioImageRead(ImageCtxT &image_ctx, AioCompletion *aio_comp, uint64_t off, size_t len, char *buf, bufferlist *pbl, int op_flags) - : AioImageRequest(image_ctx, aio_comp), m_buf(buf), m_pbl(pbl), + : AioImageRequest(image_ctx, aio_comp), m_buf(buf), m_pbl(pbl), m_op_flags(op_flags) { m_image_extents.push_back(std::make_pair(off, len)); } - AioImageRead(ImageCtx &image_ctx, AioCompletion *aio_comp, + AioImageRead(ImageCtxT &image_ctx, AioCompletion *aio_comp, const Extents &image_extents, char *buf, bufferlist *pbl, int op_flags) - : AioImageRequest(image_ctx, aio_comp), m_image_extents(image_extents), - m_buf(buf), m_pbl(pbl), m_op_flags(op_flags) { + : AioImageRequest(image_ctx, aio_comp), + m_image_extents(image_extents), m_buf(buf), m_pbl(pbl), + m_op_flags(op_flags) { } protected: @@ -93,7 +97,8 @@ class AioImageRead : public AioImageRequest<> { int m_op_flags; }; -class AbstractAioImageWrite : public AioImageRequest<> { +template +class AbstractAioImageWrite : public AioImageRequest { public: virtual bool is_write_op() const { return true; @@ -104,14 +109,16 @@ class AbstractAioImageWrite : public AioImageRequest<> { } protected: + using typename AioImageRequest::AioObjectRequests; + typedef std::vector ObjectExtents; const uint64_t m_off; const size_t m_len; - AbstractAioImageWrite(ImageCtx &image_ctx, AioCompletion *aio_comp, + AbstractAioImageWrite(ImageCtxT &image_ctx, AioCompletion *aio_comp, uint64_t off, size_t len) - : AioImageRequest(image_ctx, aio_comp), m_off(off), m_len(len), + : AioImageRequest(image_ctx, aio_comp), m_off(off), m_len(len), m_synchronous(false) { } @@ -128,7 +135,7 @@ class AbstractAioImageWrite : public AioImageRequest<> { virtual void send_object_requests(const ObjectExtents &object_extents, const ::SnapContext &snapc, AioObjectRequests *aio_object_requests); - virtual AioObjectRequest *create_object_request( + virtual AioObjectRequestHandle *create_object_request( const ObjectExtent &object_extent, const ::SnapContext &snapc, Context *on_finish) = 0; @@ -140,15 +147,19 @@ class AbstractAioImageWrite : public AioImageRequest<> { bool m_synchronous; }; -class AioImageWrite : public AbstractAioImageWrite { +template +class AioImageWrite : public AbstractAioImageWrite { public: - AioImageWrite(ImageCtx &image_ctx, AioCompletion *aio_comp, uint64_t off, + AioImageWrite(ImageCtxT &image_ctx, AioCompletion *aio_comp, uint64_t off, size_t len, const char *buf, int op_flags) - : AbstractAioImageWrite(image_ctx, aio_comp, off, len), m_buf(buf), - m_op_flags(op_flags) { + : AbstractAioImageWrite(image_ctx, aio_comp, off, len), + m_buf(buf), m_op_flags(op_flags) { } protected: + using typename AioImageRequest::AioObjectRequests; + using typename AbstractAioImageWrite::ObjectExtents; + virtual aio_type_t get_aio_type() const { return AIO_TYPE_WRITE; } @@ -164,7 +175,7 @@ class AioImageWrite : public AbstractAioImageWrite { virtual void send_object_requests(const ObjectExtents &object_extents, const ::SnapContext &snapc, AioObjectRequests *aio_object_requests); - virtual AioObjectRequest *create_object_request( + virtual AioObjectRequestHandle *create_object_request( const ObjectExtent &object_extent, const ::SnapContext &snapc, Context *on_finish); @@ -176,14 +187,18 @@ class AioImageWrite : public AbstractAioImageWrite { int m_op_flags; }; -class AioImageDiscard : public AbstractAioImageWrite { +template +class AioImageDiscard : public AbstractAioImageWrite { public: - AioImageDiscard(ImageCtx &image_ctx, AioCompletion *aio_comp, uint64_t off, + AioImageDiscard(ImageCtxT &image_ctx, AioCompletion *aio_comp, uint64_t off, uint64_t len) - : AbstractAioImageWrite(image_ctx, aio_comp, off, len) { + : AbstractAioImageWrite(image_ctx, aio_comp, off, len) { } protected: + using typename AioImageRequest::AioObjectRequests; + using typename AbstractAioImageWrite::ObjectExtents; + virtual aio_type_t get_aio_type() const { return AIO_TYPE_DISCARD; } @@ -196,7 +211,7 @@ class AioImageDiscard : public AbstractAioImageWrite { virtual void send_cache_requests(const ObjectExtents &object_extents, uint64_t journal_tid); - virtual AioObjectRequest *create_object_request( + virtual AioObjectRequestHandle *create_object_request( const ObjectExtent &object_extent, const ::SnapContext &snapc, Context *on_finish); @@ -205,10 +220,11 @@ class AioImageDiscard : public AbstractAioImageWrite { virtual void update_stats(size_t length); }; -class AioImageFlush : public AioImageRequest<> { +template +class AioImageFlush : public AioImageRequest { public: - AioImageFlush(ImageCtx &image_ctx, AioCompletion *aio_comp) - : AioImageRequest(image_ctx, aio_comp) { + AioImageFlush(ImageCtxT &image_ctx, AioCompletion *aio_comp) + : AioImageRequest(image_ctx, aio_comp) { } virtual bool is_write_op() const { @@ -216,6 +232,8 @@ class AioImageFlush : public AioImageRequest<> { } protected: + using typename AioImageRequest::AioObjectRequests; + virtual void send_request(); virtual aio_type_t get_aio_type() const { return AIO_TYPE_FLUSH; @@ -228,5 +246,9 @@ class AioImageFlush : public AioImageRequest<> { } // namespace librbd extern template class librbd::AioImageRequest; +extern template class librbd::AbstractAioImageWrite; +extern template class librbd::AioImageWrite; +extern template class librbd::AioImageDiscard; +extern template class librbd::AioImageFlush; #endif // CEPH_LIBRBD_AIO_IMAGE_REQUEST_H diff --git a/src/librbd/AioImageRequestWQ.cc b/src/librbd/AioImageRequestWQ.cc index b145edf94bca5..f8c08f0c90828 100644 --- a/src/librbd/AioImageRequestWQ.cc +++ b/src/librbd/AioImageRequestWQ.cc @@ -49,7 +49,7 @@ ssize_t AioImageRequestWQ::write(uint64_t off, uint64_t len, const char *buf, << "len = " << len << dendl; m_image_ctx.snap_lock.get_read(); - int r = clip_io(&m_image_ctx, off, &len); + int r = clip_io(util::get_image_ctx(&m_image_ctx), off, &len); m_image_ctx.snap_lock.put_read(); if (r < 0) { lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl; @@ -73,7 +73,7 @@ int AioImageRequestWQ::discard(uint64_t off, uint64_t len) { << "len = " << len << dendl; m_image_ctx.snap_lock.get_read(); - int r = clip_io(&m_image_ctx, off, &len); + int r = clip_io(util::get_image_ctx(&m_image_ctx), off, &len); m_image_ctx.snap_lock.put_read(); if (r < 0) { lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl; @@ -120,7 +120,7 @@ void AioImageRequestWQ::aio_read(AioCompletion *c, uint64_t off, uint64_t len, if (m_image_ctx.non_blocking_aio || writes_blocked() || !writes_empty() || lock_required) { - queue(new AioImageRead(m_image_ctx, c, off, len, buf, pbl, op_flags)); + queue(new AioImageRead<>(m_image_ctx, c, off, len, buf, pbl, op_flags)); } else { c->start_op(); AioImageRequest<>::aio_read(&m_image_ctx, c, off, len, buf, pbl, op_flags); @@ -147,7 +147,7 @@ void AioImageRequestWQ::aio_write(AioCompletion *c, uint64_t off, uint64_t len, RWLock::RLocker owner_locker(m_image_ctx.owner_lock); if (m_image_ctx.non_blocking_aio || writes_blocked()) { - queue(new AioImageWrite(m_image_ctx, c, off, len, buf, op_flags)); + queue(new AioImageWrite<>(m_image_ctx, c, off, len, buf, op_flags)); } else { c->start_op(); AioImageRequest<>::aio_write(&m_image_ctx, c, off, len, buf, op_flags); @@ -173,7 +173,7 @@ void AioImageRequestWQ::aio_discard(AioCompletion *c, uint64_t off, RWLock::RLocker owner_locker(m_image_ctx.owner_lock); if (m_image_ctx.non_blocking_aio || writes_blocked()) { - queue(new AioImageDiscard(m_image_ctx, c, off, len)); + queue(new AioImageDiscard<>(m_image_ctx, c, off, len)); } else { c->start_op(); AioImageRequest<>::aio_discard(&m_image_ctx, c, off, len); @@ -197,7 +197,7 @@ void AioImageRequestWQ::aio_flush(AioCompletion *c, bool native_async) { RWLock::RLocker owner_locker(m_image_ctx.owner_lock); if (m_image_ctx.non_blocking_aio || writes_blocked() || !writes_empty()) { - queue(new AioImageFlush(m_image_ctx, c)); + queue(new AioImageFlush<>(m_image_ctx, c)); } else { AioImageRequest<>::aio_flush(&m_image_ctx, c); finish_in_flight_op(); diff --git a/src/librbd/AioObjectRequest.cc b/src/librbd/AioObjectRequest.cc index 934375ec04029..cf7617640b2c2 100644 --- a/src/librbd/AioObjectRequest.cc +++ b/src/librbd/AioObjectRequest.cc @@ -27,560 +27,623 @@ namespace librbd { - AioObjectRequest::AioObjectRequest(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, uint64_t off, - uint64_t len, librados::snap_t snap_id, - Context *completion, bool hide_enoent) - : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off), - m_object_len(len), m_snap_id(snap_id), m_completion(completion), - m_hide_enoent(hide_enoent) { +template +AioObjectRequest* +AioObjectRequest::create_remove(I *ictx, const std::string &oid, + uint64_t object_no, + const ::SnapContext &snapc, + Context *completion) { + return new AioObjectRemove(util::get_image_ctx(ictx), oid, object_no, snapc, + completion); +} - Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, - 0, m_ictx->layout.object_size, m_parent_extents); +template +AioObjectRequest* +AioObjectRequest::create_truncate(I *ictx, const std::string &oid, + uint64_t object_no, uint64_t object_off, + const ::SnapContext &snapc, + Context *completion) { + return new AioObjectTruncate(util::get_image_ctx(ictx), oid, object_no, + object_off, snapc, completion); +} - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); - compute_parent_extents(); - } +template +AioObjectRequest* +AioObjectRequest::create_write(I *ictx, const std::string &oid, + uint64_t object_no, uint64_t object_off, + const ceph::bufferlist &data, + const ::SnapContext &snapc, + Context *completion, int op_flags) { + return new AioObjectWrite(util::get_image_ctx(ictx), oid, object_no, + object_off, data, snapc, completion, op_flags); +} - void AioObjectRequest::complete(int r) - { - if (should_complete(r)) { - ldout(m_ictx->cct, 20) << "complete " << this << dendl; - if (m_hide_enoent && r == -ENOENT) { - r = 0; - } - m_completion->complete(r); - delete this; - } - } +template +AioObjectRequest* +AioObjectRequest::create_zero(I *ictx, const std::string &oid, + uint64_t object_no, uint64_t object_off, + uint64_t object_len, + const ::SnapContext &snapc, + Context *completion) { + return new AioObjectZero(util::get_image_ctx(ictx), oid, object_no, + object_off, object_len, snapc, completion); +} - bool AioObjectRequest::compute_parent_extents() { - assert(m_ictx->snap_lock.is_locked()); - assert(m_ictx->parent_lock.is_locked()); +template +AioObjectRequest::AioObjectRequest(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, uint64_t off, + uint64_t len, librados::snap_t snap_id, + Context *completion, bool hide_enoent) + : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off), + m_object_len(len), m_snap_id(snap_id), m_completion(completion), + m_hide_enoent(hide_enoent) { + + Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, + 0, m_ictx->layout.object_size, m_parent_extents); + + RWLock::RLocker snap_locker(m_ictx->snap_lock); + RWLock::RLocker parent_locker(m_ictx->parent_lock); + compute_parent_extents(); +} - uint64_t parent_overlap; - int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); - if (r < 0) { - // NOTE: it's possible for a snapshot to be deleted while we are - // still reading from it - lderr(m_ictx->cct) << this << " compute_parent_extents: failed to " - << "retrieve parent overlap: " << cpp_strerror(r) - << dendl; - m_parent_extents.clear(); - return false; +template +void AioObjectRequest::complete(int r) +{ + if (should_complete(r)) { + ldout(m_ictx->cct, 20) << "complete " << this << dendl; + if (m_hide_enoent && r == -ENOENT) { + r = 0; } + m_completion->complete(r); + delete this; + } +} - uint64_t object_overlap = - m_ictx->prune_parent_extents(m_parent_extents, parent_overlap); - if (object_overlap > 0) { - ldout(m_ictx->cct, 20) << this << " compute_parent_extents: " - << "overlap " << parent_overlap << " " - << "extents " << m_parent_extents << dendl; - return true; - } +template +bool AioObjectRequest::compute_parent_extents() { + assert(m_ictx->snap_lock.is_locked()); + assert(m_ictx->parent_lock.is_locked()); + + uint64_t parent_overlap; + int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); + if (r < 0) { + // NOTE: it's possible for a snapshot to be deleted while we are + // still reading from it + lderr(m_ictx->cct) << this << " compute_parent_extents: failed to " + << "retrieve parent overlap: " << cpp_strerror(r) + << dendl; + m_parent_extents.clear(); return false; } - static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) { - assert(ictx->owner_lock.is_locked()); - assert(ictx->snap_lock.is_locked()); - return (ictx->clone_copy_on_read && - !ictx->read_only && snap_id == CEPH_NOSNAP && - (ictx->exclusive_lock == nullptr || - ictx->exclusive_lock->is_lock_owner())); + uint64_t object_overlap = + m_ictx->prune_parent_extents(m_parent_extents, parent_overlap); + if (object_overlap > 0) { + ldout(m_ictx->cct, 20) << this << " compute_parent_extents: " + << "overlap " << parent_overlap << " " + << "extents " << m_parent_extents << dendl; + return true; } + return false; +} - /** read **/ - - AioObjectRead::AioObjectRead(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, uint64_t offset, uint64_t len, - vector >& be, - librados::snap_t snap_id, bool sparse, - Context *completion, int op_flags) - : AioObjectRequest(ictx, oid, objectno, offset, len, snap_id, completion, - false), - m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse), - m_op_flags(op_flags), m_parent_completion(NULL), - m_state(LIBRBD_AIO_READ_FLAT) { +static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) { + assert(ictx->owner_lock.is_locked()); + assert(ictx->snap_lock.is_locked()); + return (ictx->clone_copy_on_read && + !ictx->read_only && snap_id == CEPH_NOSNAP && + (ictx->exclusive_lock == nullptr || + ictx->exclusive_lock->is_lock_owner())); +} - guard_read(); - } +/** read **/ + +template +AioObjectRead::AioObjectRead(I *ictx, const std::string &oid, + uint64_t objectno, uint64_t offset, + uint64_t len, + vector >& be, + librados::snap_t snap_id, bool sparse, + Context *completion, int op_flags) + : AioObjectRequest(util::get_image_ctx(ictx), oid, objectno, offset, len, + snap_id, completion, false), + m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse), + m_op_flags(op_flags), m_parent_completion(NULL), + m_state(LIBRBD_AIO_READ_FLAT) { + + guard_read(); +} - void AioObjectRead::guard_read() - { - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); +template +void AioObjectRead::guard_read() +{ + ImageCtx *image_ctx = this->m_ictx; + RWLock::RLocker snap_locker(image_ctx->snap_lock); + RWLock::RLocker parent_locker(image_ctx->parent_lock); - if (has_parent()) { - ldout(m_ictx->cct, 20) << __func__ << " guarding read" << dendl; - m_state = LIBRBD_AIO_READ_GUARD; - } + if (this->has_parent()) { + ldout(image_ctx->cct, 20) << __func__ << " guarding read" << dendl; + m_state = LIBRBD_AIO_READ_GUARD; } +} - bool AioObjectRead::should_complete(int r) - { - ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len - << " r = " << r << dendl; - - bool finished = true; - - switch (m_state) { - case LIBRBD_AIO_READ_GUARD: - ldout(m_ictx->cct, 20) << "should_complete " << this - << " READ_CHECK_GUARD" << dendl; - - // This is the step to read from parent - if (!m_tried_parent && r == -ENOENT) { - { - RWLock::RLocker owner_locker(m_ictx->owner_lock); - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); - if (m_ictx->parent == NULL) { - ldout(m_ictx->cct, 20) << "parent is gone; do nothing" << dendl; - m_state = LIBRBD_AIO_READ_FLAT; - finished = false; - break; - } - - // calculate reverse mapping onto the image - vector > parent_extents; - Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, - m_object_off, m_object_len, parent_extents); - - uint64_t parent_overlap = 0; - uint64_t object_overlap = 0; - r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); - if (r == 0) { - object_overlap = m_ictx->prune_parent_extents(parent_extents, - parent_overlap); - } +template +bool AioObjectRead::should_complete(int r) +{ + ImageCtx *image_ctx = this->m_ictx; + ldout(image_ctx->cct, 20) << "should_complete " << this << " " + << this->m_oid << " " + << this->m_object_off << "~" << this->m_object_len + << " r = " << r << dendl; + + bool finished = true; + + switch (m_state) { + case LIBRBD_AIO_READ_GUARD: + ldout(image_ctx->cct, 20) << "should_complete " << this + << " READ_CHECK_GUARD" << dendl; + + // This is the step to read from parent + if (!m_tried_parent && r == -ENOENT) { + { + RWLock::RLocker owner_locker(image_ctx->owner_lock); + RWLock::RLocker snap_locker(image_ctx->snap_lock); + RWLock::RLocker parent_locker(image_ctx->parent_lock); + if (image_ctx->parent == NULL) { + ldout(image_ctx->cct, 20) << "parent is gone; do nothing" << dendl; + m_state = LIBRBD_AIO_READ_FLAT; + finished = false; + break; + } - if (object_overlap > 0) { - m_tried_parent = true; - if (is_copy_on_read(m_ictx, m_snap_id)) { - m_state = LIBRBD_AIO_READ_COPYUP; - } + // calculate reverse mapping onto the image + vector > parent_extents; + Striper::extent_to_file(image_ctx->cct, &image_ctx->layout, + this->m_object_no, this->m_object_off, + this->m_object_len, parent_extents); + + uint64_t parent_overlap = 0; + uint64_t object_overlap = 0; + r = image_ctx->get_parent_overlap(this->m_snap_id, &parent_overlap); + if (r == 0) { + object_overlap = image_ctx->prune_parent_extents(parent_extents, + parent_overlap); + } - read_from_parent(parent_extents); - finished = false; + if (object_overlap > 0) { + m_tried_parent = true; + if (is_copy_on_read(image_ctx, this->m_snap_id)) { + m_state = LIBRBD_AIO_READ_COPYUP; } - } - if (m_tried_parent) { - // release reference to the parent read completion. this request - // might be completed after unblock is invoked. - AioCompletion *parent_completion = m_parent_completion; - parent_completion->unblock(); - parent_completion->put(); + read_from_parent(parent_extents); + finished = false; } } - break; - case LIBRBD_AIO_READ_COPYUP: - ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_COPYUP" - << dendl; - // This is the extra step for copy-on-read: kick off an asynchronous copyup. - // It is different from copy-on-write as asynchronous copyup will finish - // by itself so state won't go back to LIBRBD_AIO_READ_GUARD. - - assert(m_tried_parent); - if (r > 0) { - // If read entire object from parent success and CoR is possible, kick - // off a asynchronous copyup. This approach minimizes the latency - // impact. - send_copyup(); + + if (m_tried_parent) { + // release reference to the parent read completion. this request + // might be completed after unblock is invoked. + AioCompletion *parent_completion = m_parent_completion; + parent_completion->unblock(); + parent_completion->put(); } - break; - case LIBRBD_AIO_READ_FLAT: - ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_FLAT" - << dendl; - // The read content should be deposit in m_read_data - break; - default: - lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; - assert(0); } - - return finished; + break; + case LIBRBD_AIO_READ_COPYUP: + ldout(image_ctx->cct, 20) << "should_complete " << this << " READ_COPYUP" + << dendl; + // This is the extra step for copy-on-read: kick off an asynchronous copyup. + // It is different from copy-on-write as asynchronous copyup will finish + // by itself so state won't go back to LIBRBD_AIO_READ_GUARD. + + assert(m_tried_parent); + if (r > 0) { + // If read entire object from parent success and CoR is possible, kick + // off a asynchronous copyup. This approach minimizes the latency + // impact. + send_copyup(); + } + break; + case LIBRBD_AIO_READ_FLAT: + ldout(image_ctx->cct, 20) << "should_complete " << this << " READ_FLAT" + << dendl; + // The read content should be deposit in m_read_data + break; + default: + lderr(image_ctx->cct) << "invalid request state: " << m_state << dendl; + assert(0); } - void AioObjectRead::send() { - ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len << dendl; + return finished; +} - { - RWLock::RLocker snap_locker(m_ictx->snap_lock); +template +void AioObjectRead::send() { + ImageCtx *image_ctx = this->m_ictx; + ldout(image_ctx->cct, 20) << "send " << this << " " << this->m_oid << " " + << this->m_object_off << "~" << this->m_object_len + << dendl; - // send read request to parent if the object doesn't exist locally - if (m_ictx->object_map != nullptr && - !m_ictx->object_map->object_may_exist(m_object_no)) { - m_ictx->op_work_queue->queue(util::create_context_callback< - AioObjectRequest>(this), -ENOENT); - return; - } + { + RWLock::RLocker snap_locker(image_ctx->snap_lock); + + // send read request to parent if the object doesn't exist locally + if (image_ctx->object_map != nullptr && + !image_ctx->object_map->object_may_exist(this->m_object_no)) { + image_ctx->op_work_queue->queue(util::create_context_callback< + AioObjectRequest >(this), -ENOENT); + return; } + } - librados::ObjectReadOperation op; - int flags = m_ictx->get_read_flags(m_snap_id); - if (m_sparse) { - op.sparse_read(m_object_off, m_object_len, &m_ext_map, &m_read_data, - NULL); - } else { - op.read(m_object_off, m_object_len, &m_read_data, NULL); - } - op.set_op_flags2(m_op_flags); + librados::ObjectReadOperation op; + int flags = image_ctx->get_read_flags(this->m_snap_id); + if (m_sparse) { + op.sparse_read(this->m_object_off, this->m_object_len, &m_ext_map, + &m_read_data, nullptr); + } else { + op.read(this->m_object_off, this->m_object_len, &m_read_data, nullptr); + } + op.set_op_flags2(m_op_flags); - librados::AioCompletion *rados_completion = - util::create_rados_ack_callback(this); - int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &op, flags, - NULL); - assert(r == 0); + librados::AioCompletion *rados_completion = + util::create_rados_ack_callback(this); + int r = image_ctx->data_ctx.aio_operate(this->m_oid, rados_completion, &op, + flags, nullptr); + assert(r == 0); - rados_completion->release(); - } + rados_completion->release(); +} - void AioObjectRead::send_copyup() +template +void AioObjectRead::send_copyup() +{ + ImageCtx *image_ctx = this->m_ictx; { - { - RWLock::RLocker owner_locker(m_ictx->owner_lock); - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); - if (!compute_parent_extents() || - (m_ictx->exclusive_lock != nullptr && - !m_ictx->exclusive_lock->is_lock_owner())) { - return; - } - } - - Mutex::Locker copyup_locker(m_ictx->copyup_list_lock); - map::iterator it = - m_ictx->copyup_list.find(m_object_no); - if (it == m_ictx->copyup_list.end()) { - // create and kick off a CopyupRequest - CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, m_object_no, - m_parent_extents); - m_ictx->copyup_list[m_object_no] = new_req; - new_req->send(); + RWLock::RLocker owner_locker(image_ctx->owner_lock); + RWLock::RLocker snap_locker(image_ctx->snap_lock); + RWLock::RLocker parent_locker(image_ctx->parent_lock); + if (!this->compute_parent_extents() || + (image_ctx->exclusive_lock != nullptr && + !image_ctx->exclusive_lock->is_lock_owner())) { + return; } } - void AioObjectRead::read_from_parent(const vector >& parent_extents) - { - assert(!m_parent_completion); - m_parent_completion = AioCompletion::create_and_start( - this, m_ictx, AIO_TYPE_READ); - - // prevent the parent image from being deleted while this - // request is still in-progress - m_parent_completion->get(); - m_parent_completion->block(); - - ldout(m_ictx->cct, 20) << "read_from_parent this = " << this - << " parent completion " << m_parent_completion - << " extents " << parent_extents - << dendl; - RWLock::RLocker owner_locker(m_ictx->parent->owner_lock); - AioImageRequest<>::aio_read(m_ictx->parent, m_parent_completion, - parent_extents, NULL, &m_read_data, 0); + Mutex::Locker copyup_locker(image_ctx->copyup_list_lock); + map::iterator it = + image_ctx->copyup_list.find(this->m_object_no); + if (it == image_ctx->copyup_list.end()) { + // create and kick off a CopyupRequest + CopyupRequest *new_req = new CopyupRequest(image_ctx, this->m_oid, + this->m_object_no, + this->m_parent_extents); + image_ctx->copyup_list[this->m_object_no] = new_req; + new_req->send(); } +} - /** write **/ - - AbstractAioObjectWrite::AbstractAioObjectWrite(ImageCtx *ictx, - const std::string &oid, - uint64_t object_no, - uint64_t object_off, - uint64_t len, - const ::SnapContext &snapc, - Context *completion, - bool hide_enoent) - : AioObjectRequest(ictx, oid, object_no, object_off, len, CEPH_NOSNAP, - completion, hide_enoent), - m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val) - { - m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); - } +template +void AioObjectRead::read_from_parent(const Extents& parent_extents) +{ + ImageCtx *image_ctx = this->m_ictx; + assert(!m_parent_completion); + m_parent_completion = AioCompletion::create_and_start >( + this, image_ctx, AIO_TYPE_READ); + + // prevent the parent image from being deleted while this + // request is still in-progress + m_parent_completion->get(); + m_parent_completion->block(); + + ldout(image_ctx->cct, 20) << "read_from_parent this = " << this + << " parent completion " << m_parent_completion + << " extents " << parent_extents + << dendl; + RWLock::RLocker owner_locker(image_ctx->parent->owner_lock); + AioImageRequest<>::aio_read(image_ctx->parent, m_parent_completion, + parent_extents, NULL, &m_read_data, 0); +} - void AbstractAioObjectWrite::guard_write() - { - if (has_parent()) { - m_state = LIBRBD_AIO_WRITE_GUARD; - m_write.assert_exists(); - ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl; - } +/** write **/ + +AbstractAioObjectWrite::AbstractAioObjectWrite(ImageCtx *ictx, + const std::string &oid, + uint64_t object_no, + uint64_t object_off, + uint64_t len, + const ::SnapContext &snapc, + Context *completion, + bool hide_enoent) + : AioObjectRequest(ictx, oid, object_no, object_off, len, CEPH_NOSNAP, + completion, hide_enoent), + m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val) +{ + m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); +} + +void AbstractAioObjectWrite::guard_write() +{ + if (has_parent()) { + m_state = LIBRBD_AIO_WRITE_GUARD; + m_write.assert_exists(); + ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl; } +} - bool AbstractAioObjectWrite::should_complete(int r) - { - ldout(m_ictx->cct, 20) << get_write_type() << " " << this << " " << m_oid - << " " << m_object_off << "~" << m_object_len - << " should_complete: r = " << r << dendl; - - bool finished = true; - switch (m_state) { - case LIBRBD_AIO_WRITE_PRE: - ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl; - if (r < 0) { - return true; - } +bool AbstractAioObjectWrite::should_complete(int r) +{ + ldout(m_ictx->cct, 20) << get_write_type() << " " << this << " " << m_oid + << " " << m_object_off << "~" << m_object_len + << " should_complete: r = " << r << dendl; - send_write(); - finished = false; - break; + bool finished = true; + switch (m_state) { + case LIBRBD_AIO_WRITE_PRE: + ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl; + if (r < 0) { + return true; + } - case LIBRBD_AIO_WRITE_POST: - ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl; - finished = true; - break; + send_write(); + finished = false; + break; - case LIBRBD_AIO_WRITE_GUARD: - ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl; - - if (r == -ENOENT) { - handle_write_guard(); - finished = false; - break; - } else if (r < 0) { - // pass the error code to the finish context - m_state = LIBRBD_AIO_WRITE_ERROR; - complete(r); - finished = false; - break; - } + case LIBRBD_AIO_WRITE_POST: + ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl; + finished = true; + break; - finished = send_post(); - break; + case LIBRBD_AIO_WRITE_GUARD: + ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl; - case LIBRBD_AIO_WRITE_COPYUP: - ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl; - if (r < 0) { - m_state = LIBRBD_AIO_WRITE_ERROR; - complete(r); - finished = false; - } else { - finished = send_post(); - } + if (r == -ENOENT) { + handle_write_guard(); + finished = false; + break; + } else if (r < 0) { + // pass the error code to the finish context + m_state = LIBRBD_AIO_WRITE_ERROR; + complete(r); + finished = false; break; + } - case LIBRBD_AIO_WRITE_FLAT: - ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl; + finished = send_post(); + break; + case LIBRBD_AIO_WRITE_COPYUP: + ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl; + if (r < 0) { + m_state = LIBRBD_AIO_WRITE_ERROR; + complete(r); + finished = false; + } else { finished = send_post(); - break; + } + break; - case LIBRBD_AIO_WRITE_ERROR: - assert(r < 0); - lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r) - << dendl; - break; + case LIBRBD_AIO_WRITE_FLAT: + ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl; - default: - lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; - assert(0); - } + finished = send_post(); + break; - return finished; - } + case LIBRBD_AIO_WRITE_ERROR: + assert(r < 0); + lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r) << dendl; + break; - void AbstractAioObjectWrite::send() { - assert(m_ictx->owner_lock.is_locked()); - ldout(m_ictx->cct, 20) << "send " << get_write_type() << " " << this <<" " - << m_oid << " " << m_object_off << "~" - << m_object_len << dendl; - send_pre(); + default: + lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; + assert(0); } - void AbstractAioObjectWrite::send_pre() { - assert(m_ictx->owner_lock.is_locked()); + return finished; +} - bool write = false; - { - RWLock::RLocker snap_lock(m_ictx->snap_lock); - if (m_ictx->object_map == nullptr) { - m_object_exist = true; - write = true; +void AbstractAioObjectWrite::send() { + assert(m_ictx->owner_lock.is_locked()); + ldout(m_ictx->cct, 20) << "send " << get_write_type() << " " << this <<" " + << m_oid << " " << m_object_off << "~" + << m_object_len << dendl; + send_pre(); +} + +void AbstractAioObjectWrite::send_pre() { + assert(m_ictx->owner_lock.is_locked()); + + bool write = false; + { + RWLock::RLocker snap_lock(m_ictx->snap_lock); + if (m_ictx->object_map == nullptr) { + m_object_exist = true; + write = true; + } else { + // should have been flushed prior to releasing lock + assert(m_ictx->exclusive_lock->is_lock_owner()); + m_object_exist = m_ictx->object_map->object_may_exist(m_object_no); + + uint8_t new_state; + pre_object_map_update(&new_state); + + RWLock::WLocker object_map_locker(m_ictx->object_map_lock); + if (m_ictx->object_map->update_required(m_object_no, new_state)) { + ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len + << dendl; + m_state = LIBRBD_AIO_WRITE_PRE; + + Context *ctx = util::create_context_callback(this); + bool updated = m_ictx->object_map->aio_update(m_object_no, new_state, + {}, ctx); + assert(updated); } else { - // should have been flushed prior to releasing lock - assert(m_ictx->exclusive_lock->is_lock_owner()); - m_object_exist = m_ictx->object_map->object_may_exist(m_object_no); - - uint8_t new_state; - pre_object_map_update(&new_state); - - RWLock::WLocker object_map_locker(m_ictx->object_map_lock); - if (m_ictx->object_map->update_required(m_object_no, new_state)) { - ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len - << dendl; - m_state = LIBRBD_AIO_WRITE_PRE; - - Context *ctx = util::create_context_callback(this); - bool updated = m_ictx->object_map->aio_update(m_object_no, new_state, - {}, ctx); - assert(updated); - } else { - write = true; - } + write = true; } } + } - // avoid possible recursive lock attempts - if (write) { - // no object map update required - send_write(); - } + // avoid possible recursive lock attempts + if (write) { + // no object map update required + send_write(); } +} - bool AbstractAioObjectWrite::send_post() { - RWLock::RLocker owner_locker(m_ictx->owner_lock); - RWLock::RLocker snap_locker(m_ictx->snap_lock); - if (m_ictx->object_map == nullptr || !post_object_map_update()) { - return true; - } +bool AbstractAioObjectWrite::send_post() { + RWLock::RLocker owner_locker(m_ictx->owner_lock); + RWLock::RLocker snap_locker(m_ictx->snap_lock); + if (m_ictx->object_map == nullptr || !post_object_map_update()) { + return true; + } - // should have been flushed prior to releasing lock - assert(m_ictx->exclusive_lock->is_lock_owner()); + // should have been flushed prior to releasing lock + assert(m_ictx->exclusive_lock->is_lock_owner()); - RWLock::WLocker object_map_locker(m_ictx->object_map_lock); - if (!m_ictx->object_map->update_required(m_object_no, OBJECT_NONEXISTENT)) { - return true; - } + RWLock::WLocker object_map_locker(m_ictx->object_map_lock); + if (!m_ictx->object_map->update_required(m_object_no, OBJECT_NONEXISTENT)) { + return true; + } - ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len << dendl; - m_state = LIBRBD_AIO_WRITE_POST; + ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len << dendl; + m_state = LIBRBD_AIO_WRITE_POST; - Context *ctx = util::create_context_callback(this); - bool updated = m_ictx->object_map->aio_update(m_object_no, - OBJECT_NONEXISTENT, - OBJECT_PENDING, ctx); - assert(updated); - return false; - } + Context *ctx = util::create_context_callback(this); + bool updated = m_ictx->object_map->aio_update(m_object_no, + OBJECT_NONEXISTENT, + OBJECT_PENDING, ctx); + assert(updated); + return false; +} - void AbstractAioObjectWrite::send_write() { - ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len - << " object exist " << m_object_exist << dendl; +void AbstractAioObjectWrite::send_write() { + ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len + << " object exist " << m_object_exist << dendl; - if (!m_object_exist && has_parent()) { - m_state = LIBRBD_AIO_WRITE_GUARD; - handle_write_guard(); - } else { - send_write_op(true); - } + if (!m_object_exist && has_parent()) { + m_state = LIBRBD_AIO_WRITE_GUARD; + handle_write_guard(); + } else { + send_write_op(true); } +} - void AbstractAioObjectWrite::send_copyup() - { - ldout(m_ictx->cct, 20) << "send_copyup " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len << dendl; - m_state = LIBRBD_AIO_WRITE_COPYUP; - - m_ictx->copyup_list_lock.Lock(); - map::iterator it = - m_ictx->copyup_list.find(m_object_no); - if (it == m_ictx->copyup_list.end()) { - CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, - m_object_no, - m_parent_extents); - - // make sure to wait on this CopyupRequest - new_req->append_request(this); - m_ictx->copyup_list[m_object_no] = new_req; - - m_ictx->copyup_list_lock.Unlock(); - new_req->send(); - } else { - it->second->append_request(this); - m_ictx->copyup_list_lock.Unlock(); - } +void AbstractAioObjectWrite::send_copyup() +{ + ldout(m_ictx->cct, 20) << "send_copyup " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len << dendl; + m_state = LIBRBD_AIO_WRITE_COPYUP; + + m_ictx->copyup_list_lock.Lock(); + map::iterator it = + m_ictx->copyup_list.find(m_object_no); + if (it == m_ictx->copyup_list.end()) { + CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, + m_object_no, + m_parent_extents); + + // make sure to wait on this CopyupRequest + new_req->append_request(this); + m_ictx->copyup_list[m_object_no] = new_req; + + m_ictx->copyup_list_lock.Unlock(); + new_req->send(); + } else { + it->second->append_request(this); + m_ictx->copyup_list_lock.Unlock(); } - void AbstractAioObjectWrite::send_write_op(bool write_guard) +} +void AbstractAioObjectWrite::send_write_op(bool write_guard) +{ + m_state = LIBRBD_AIO_WRITE_FLAT; + if (write_guard) + guard_write(); + add_write_ops(&m_write); + assert(m_write.size() != 0); + + librados::AioCompletion *rados_completion = + util::create_rados_safe_callback(this); + int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &m_write, + m_snap_seq, m_snaps); + assert(r == 0); + rados_completion->release(); +} +void AbstractAioObjectWrite::handle_write_guard() +{ + bool has_parent; { - m_state = LIBRBD_AIO_WRITE_FLAT; - if (write_guard) - guard_write(); - add_write_ops(&m_write); - assert(m_write.size() != 0); - - librados::AioCompletion *rados_completion = - util::create_rados_safe_callback(this); - int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &m_write, - m_snap_seq, m_snaps); - assert(r == 0); - rados_completion->release(); + RWLock::RLocker snap_locker(m_ictx->snap_lock); + RWLock::RLocker parent_locker(m_ictx->parent_lock); + has_parent = compute_parent_extents(); } - void AbstractAioObjectWrite::handle_write_guard() - { - bool has_parent; - { - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); - has_parent = compute_parent_extents(); - } - // If parent still exists, overlap might also have changed. - if (has_parent) { - send_copyup(); - } else { - // parent may have disappeared -- send original write again - ldout(m_ictx->cct, 20) << "should_complete(" << this - << "): parent overlap now 0" << dendl; - send_write(); - } + // If parent still exists, overlap might also have changed. + if (has_parent) { + send_copyup(); + } else { + // parent may have disappeared -- send original write again + ldout(m_ictx->cct, 20) << "should_complete(" << this + << "): parent overlap now 0" << dendl; + send_write(); } +} - void AioObjectWrite::add_write_ops(librados::ObjectWriteOperation *wr) { - RWLock::RLocker snap_locker(m_ictx->snap_lock); - if (m_ictx->enable_alloc_hint && - (m_ictx->object_map == nullptr || - !m_object_exist)) { - wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size()); - } - - if (m_object_off == 0 && m_object_len == m_ictx->get_object_size()) { - wr->write_full(m_write_data); - } else { - wr->write(m_object_off, m_write_data); - } - wr->set_op_flags2(m_op_flags); +void AioObjectWrite::add_write_ops(librados::ObjectWriteOperation *wr) { + RWLock::RLocker snap_locker(m_ictx->snap_lock); + if (m_ictx->enable_alloc_hint && + (m_ictx->object_map == nullptr || !m_object_exist)) { + wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size()); } - void AioObjectWrite::send_write() { - bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size()); - ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len - << " object exist " << m_object_exist - << " write_full " << write_full << dendl; - if (write_full && !has_parent()) { - send_write_op(false); - } else { - AbstractAioObjectWrite::send_write(); - } + if (m_object_off == 0 && m_object_len == m_ictx->get_object_size()) { + wr->write_full(m_write_data); + } else { + wr->write(m_object_off, m_write_data); } + wr->set_op_flags2(m_op_flags); +} - void AioObjectRemove::guard_write() { - // do nothing to disable write guard only if deep-copyup not required - RWLock::RLocker snap_locker(m_ictx->snap_lock); - if (!m_ictx->snaps.empty()) { - AbstractAioObjectWrite::guard_write(); - } +void AioObjectWrite::send_write() { + bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size()); + ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len + << " object exist " << m_object_exist + << " write_full " << write_full << dendl; + if (write_full && !has_parent()) { + send_write_op(false); + } else { + AbstractAioObjectWrite::send_write(); } - void AioObjectRemove::send_write() { - ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len << dendl; - send_write_op(true); +} + +void AioObjectRemove::guard_write() { + // do nothing to disable write guard only if deep-copyup not required + RWLock::RLocker snap_locker(m_ictx->snap_lock); + if (!m_ictx->snaps.empty()) { + AbstractAioObjectWrite::guard_write(); } - void AioObjectTruncate::send_write() { - ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid - << " truncate " << m_object_off << dendl; - if (!m_object_exist && ! has_parent()) { - m_state = LIBRBD_AIO_WRITE_FLAT; - Context *ctx = util::create_context_callback(this); - m_ictx->op_work_queue->queue(ctx, 0); - } else { - AbstractAioObjectWrite::send_write(); - } +} +void AioObjectRemove::send_write() { + ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len << dendl; + send_write_op(true); +} +void AioObjectTruncate::send_write() { + ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid + << " truncate " << m_object_off << dendl; + if (!m_object_exist && ! has_parent()) { + m_state = LIBRBD_AIO_WRITE_FLAT; + Context *ctx = util::create_context_callback(this); + m_ictx->op_work_queue->queue(ctx, 0); + } else { + AbstractAioObjectWrite::send_write(); } } + +} // namespace librbd + +template class librbd::AioObjectRequest; +template class librbd::AioObjectRead; diff --git a/src/librbd/AioObjectRequest.h b/src/librbd/AioObjectRequest.h index 9647a3e698661..068ce1a846dc8 100644 --- a/src/librbd/AioObjectRequest.h +++ b/src/librbd/AioObjectRequest.h @@ -1,7 +1,8 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab -#ifndef CEPH_LIBRBD_AIOREQUEST_H -#define CEPH_LIBRBD_AIOREQUEST_H + +#ifndef CEPH_LIBRBD_AIO_OBJECT_REQUEST_H +#define CEPH_LIBRBD_AIO_OBJECT_REQUEST_H #include "include/int_types.h" @@ -16,339 +17,397 @@ class Context; namespace librbd { - struct AioCompletion; - struct ImageCtx; - class CopyupRequest; +struct AioCompletion; +class AioObjectRemove; +class AioObjectTruncate; +class AioObjectWrite; +class AioObjectZero; +struct ImageCtx; +class CopyupRequest; + +struct AioObjectRequestHandle { + virtual ~AioObjectRequestHandle() { + } + + virtual void complete(int r) = 0; + virtual void send() = 0; +}; + +/** + * This class represents an I/O operation to a single RBD data object. + * Its subclasses encapsulate logic for dealing with special cases + * for I/O due to layering. + */ +template +class AioObjectRequest : public AioObjectRequestHandle { +public: + typedef std::vector > Extents; + + static AioObjectRequest* create_remove(ImageCtxT *ictx, + const std::string &oid, + uint64_t object_no, + const ::SnapContext &snapc, + Context *completion); + static AioObjectRequest* create_truncate(ImageCtxT *ictx, + const std::string &oid, + uint64_t object_no, + uint64_t object_off, + const ::SnapContext &snapc, + Context *completion); + static AioObjectRequest* create_write(ImageCtxT *ictx, const std::string &oid, + uint64_t object_no, + uint64_t object_off, + const ceph::bufferlist &data, + const ::SnapContext &snapc, + Context *completion, int op_flags); + static AioObjectRequest* create_zero(ImageCtxT *ictx, const std::string &oid, + uint64_t object_no, uint64_t object_off, + uint64_t object_len, + const ::SnapContext &snapc, + Context *completion); + + AioObjectRequest(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, uint64_t off, uint64_t len, + librados::snap_t snap_id, + Context *completion, bool hide_enoent); + virtual ~AioObjectRequest() {} + + virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) {}; + + void complete(int r); + + virtual bool should_complete(int r) = 0; + virtual void send() = 0; + + bool has_parent() const { + return !m_parent_extents.empty(); + } + +protected: + bool compute_parent_extents(); + + ImageCtx *m_ictx; + std::string m_oid; + uint64_t m_object_no, m_object_off, m_object_len; + librados::snap_t m_snap_id; + Context *m_completion; + Extents m_parent_extents; + bool m_hide_enoent; +}; + +template +class AioObjectRead : public AioObjectRequest { +public: + typedef std::vector > Extents; + typedef std::map ExtentMap; + + static AioObjectRead* create(ImageCtxT *ictx, const std::string &oid, + uint64_t objectno, uint64_t offset, + uint64_t len, Extents &buffer_extents, + librados::snap_t snap_id, bool sparse, + Context *completion, int op_flags) { + return new AioObjectRead(ictx, oid, objectno, offset, len, buffer_extents, + snap_id, sparse, completion, op_flags); + } + + AioObjectRead(ImageCtxT *ictx, const std::string &oid, + uint64_t objectno, uint64_t offset, uint64_t len, + Extents& buffer_extents, librados::snap_t snap_id, bool sparse, + Context *completion, int op_flags); + + virtual bool should_complete(int r); + virtual void send(); + void guard_read(); + + inline uint64_t get_offset() const { + return this->m_object_off; + } + inline uint64_t get_length() const { + return this->m_object_len; + } + ceph::bufferlist &data() { + return m_read_data; + } + const Extents &get_buffer_extents() const { + return m_buffer_extents; + } + ExtentMap &get_extent_map() { + return m_ext_map; + } +private: + Extents m_buffer_extents; + bool m_tried_parent; + bool m_sparse; + int m_op_flags; + ceph::bufferlist m_read_data; + AioCompletion *m_parent_completion; + ExtentMap m_ext_map; /** - * This class represents an I/O operation to a single RBD data object. - * Its subclasses encapsulate logic for dealing with special cases - * for I/O due to layering. + * Reads go through the following state machine to deal with + * layering: + * + * need copyup + * LIBRBD_AIO_READ_GUARD ---------------> LIBRBD_AIO_READ_COPYUP + * | | + * v | + * done <------------------------------------/ + * ^ + * | + * LIBRBD_AIO_READ_FLAT + * + * Reads start in LIBRBD_AIO_READ_GUARD or _FLAT, depending on + * whether there is a parent or not. */ - class AioObjectRequest - { - public: - AioObjectRequest(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, uint64_t off, uint64_t len, - librados::snap_t snap_id, - Context *completion, bool hide_enoent); - virtual ~AioObjectRequest() {} - - virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) {}; - - void complete(int r); - - virtual bool should_complete(int r) = 0; - virtual void send() = 0; - - bool has_parent() const { - return !m_parent_extents.empty(); - } - - protected: - bool compute_parent_extents(); - - ImageCtx *m_ictx; - std::string m_oid; - uint64_t m_object_no, m_object_off, m_object_len; - librados::snap_t m_snap_id; - Context *m_completion; - std::vector > m_parent_extents; - bool m_hide_enoent; + enum read_state_d { + LIBRBD_AIO_READ_GUARD, + LIBRBD_AIO_READ_COPYUP, + LIBRBD_AIO_READ_FLAT }; - class AioObjectRead : public AioObjectRequest { - public: - AioObjectRead(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, uint64_t offset, uint64_t len, - vector >& be, - librados::snap_t snap_id, bool sparse, - Context *completion, int op_flags); - - virtual bool should_complete(int r); - virtual void send(); - void guard_read(); - - ceph::bufferlist &data() { - return m_read_data; - } - - std::map m_ext_map; - - friend class C_AioRead; - - private: - vector > m_buffer_extents; - bool m_tried_parent; - bool m_sparse; - int m_op_flags; - ceph::bufferlist m_read_data; - AioCompletion *m_parent_completion; - - /** - * Reads go through the following state machine to deal with - * layering: - * - * need copyup - * LIBRBD_AIO_READ_GUARD ---------------> LIBRBD_AIO_READ_COPYUP - * | | - * v | - * done <------------------------------------/ - * ^ - * | - * LIBRBD_AIO_READ_FLAT - * - * Reads start in LIBRBD_AIO_READ_GUARD or _FLAT, depending on - * whether there is a parent or not. - */ - enum read_state_d { - LIBRBD_AIO_READ_GUARD, - LIBRBD_AIO_READ_COPYUP, - LIBRBD_AIO_READ_FLAT - }; - - read_state_d m_state; - - void send_copyup(); - - void read_from_parent(const vector >& image_extents); - }; - - class AbstractAioObjectWrite : public AioObjectRequest { - public: - AbstractAioObjectWrite(ImageCtx *ictx, const std::string &oid, - uint64_t object_no, uint64_t object_off, - uint64_t len, const ::SnapContext &snapc, - Context *completion, bool hide_enoent); - - virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) - { - add_write_ops(wr); - } + read_state_d m_state; - virtual bool should_complete(int r); - virtual void send(); - - /** - * Writes go through the following state machine to deal with - * layering and the object map: - * - * - * . | - * . | - * . \---> LIBRBD_AIO_WRITE_PRE - * . | | - * . . . . . . | . . . . | . . . . . . . . . . . - * . | -or- | . - * . | | v - * . | \----------------> LIBRBD_AIO_WRITE_FLAT . . . - * . | | . - * v v need copyup | . - * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | . - * . | | . | . - * . | | . | . - * . | /-----/ . | . - * . | | . | . - * . \-------------------\ | /-------------------/ . - * . | | | . . - * . v v v . . - * . LIBRBD_AIO_WRITE_POST . . - * . | . . - * . | . . . . . . . . . - * . | . . - * . v v . - * . . . . . . . . . . . . . . > < . . . . . . . . . . . . . . - * - * The _PRE/_POST states are skipped if the object map is disabled. - * The write starts in _WRITE_GUARD or _FLAT depending on whether or not - * there is a parent overlap. - */ - protected: - enum write_state_d { - LIBRBD_AIO_WRITE_GUARD, - LIBRBD_AIO_WRITE_COPYUP, - LIBRBD_AIO_WRITE_FLAT, - LIBRBD_AIO_WRITE_PRE, - LIBRBD_AIO_WRITE_POST, - LIBRBD_AIO_WRITE_ERROR - }; - - write_state_d m_state; - librados::ObjectWriteOperation m_write; - uint64_t m_snap_seq; - std::vector m_snaps; - bool m_object_exist; - - virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0; - virtual const char* get_write_type() const = 0; - virtual void guard_write(); - virtual void pre_object_map_update(uint8_t *new_state) = 0; - virtual bool post_object_map_update() { - return false; - } - virtual void send_write(); - virtual void send_write_op(bool write_guard); - virtual void handle_write_guard(); - - private: - void send_pre(); - bool send_post(); - void send_copyup(); - }; - - class AioObjectWrite : public AbstractAioObjectWrite { - public: - AioObjectWrite(ImageCtx *ictx, const std::string &oid, uint64_t object_no, - uint64_t object_off, const ceph::bufferlist &data, - const ::SnapContext &snapc, Context *completion) - : AbstractAioObjectWrite(ictx, oid, object_no, object_off, data.length(), - snapc, completion, false), - m_write_data(data), m_op_flags(0) { - } - - void set_op_flags(int op_flags) { - m_op_flags = op_flags; - } - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr); - - virtual const char* get_write_type() const { - return "write"; - } - - virtual void pre_object_map_update(uint8_t *new_state) { - *new_state = OBJECT_EXISTS; - } - virtual void send_write(); - - private: - ceph::bufferlist m_write_data; - int m_op_flags; - }; - - class AioObjectRemove : public AbstractAioObjectWrite { - public: - AioObjectRemove(ImageCtx *ictx, const std::string &oid, uint64_t object_no, - const ::SnapContext &snapc, Context *completion) - : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion, - true), - m_object_state(OBJECT_NONEXISTENT) { - } + void send_copyup(); - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr) { - if (has_parent()) { - wr->truncate(0); - } else { - wr->remove(); - } - } + void read_from_parent(const Extents& image_extents); +}; - virtual const char* get_write_type() const { - if (has_parent()) { - return "remove (trunc)"; - } - return "remove"; - } - virtual void pre_object_map_update(uint8_t *new_state) { - if (has_parent()) { - m_object_state = OBJECT_EXISTS; - } else { - m_object_state = OBJECT_PENDING; - } - *new_state = m_object_state; - } +class AbstractAioObjectWrite : public AioObjectRequest<> { +public: + AbstractAioObjectWrite(ImageCtx *ictx, const std::string &oid, + uint64_t object_no, uint64_t object_off, + uint64_t len, const ::SnapContext &snapc, + Context *completion, bool hide_enoent); - virtual bool post_object_map_update() { - if (m_object_state == OBJECT_EXISTS) { - return false; - } - return true; - } + virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) + { + add_write_ops(wr); + } - virtual void guard_write(); - virtual void send_write(); + virtual bool should_complete(int r); + virtual void send(); - private: - uint8_t m_object_state; + /** + * Writes go through the following state machine to deal with + * layering and the object map: + * + * + * . | + * . | + * . \---> LIBRBD_AIO_WRITE_PRE + * . | | + * . . . . . . | . . . . | . . . . . . . . . . . + * . | -or- | . + * . | | v + * . | \----------------> LIBRBD_AIO_WRITE_FLAT . . . + * . | | . + * v v need copyup | . + * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | . + * . | | . | . + * . | | . | . + * . | /-----/ . | . + * . | | . | . + * . \-------------------\ | /-------------------/ . + * . | | | . . + * . v v v . . + * . LIBRBD_AIO_WRITE_POST . . + * . | . . + * . | . . . . . . . . . + * . | . . + * . v v . + * . . . . . . . . . . . . . . > < . . . . . . . . . . . . . . + * + * The _PRE/_POST states are skipped if the object map is disabled. + * The write starts in _WRITE_GUARD or _FLAT depending on whether or not + * there is a parent overlap. + */ +protected: + enum write_state_d { + LIBRBD_AIO_WRITE_GUARD, + LIBRBD_AIO_WRITE_COPYUP, + LIBRBD_AIO_WRITE_FLAT, + LIBRBD_AIO_WRITE_PRE, + LIBRBD_AIO_WRITE_POST, + LIBRBD_AIO_WRITE_ERROR }; - class AioObjectTrim : public AbstractAioObjectWrite { - public: - AioObjectTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no, + write_state_d m_state; + librados::ObjectWriteOperation m_write; + uint64_t m_snap_seq; + std::vector m_snaps; + bool m_object_exist; + + virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0; + virtual const char* get_write_type() const = 0; + virtual void guard_write(); + virtual void pre_object_map_update(uint8_t *new_state) = 0; + virtual bool post_object_map_update() { + return false; + } + virtual void send_write(); + virtual void send_write_op(bool write_guard); + virtual void handle_write_guard(); + +private: + void send_pre(); + bool send_post(); + void send_copyup(); +}; + +class AioObjectWrite : public AbstractAioObjectWrite { +public: + AioObjectWrite(ImageCtx *ictx, const std::string &oid, uint64_t object_no, + uint64_t object_off, const ceph::bufferlist &data, + const ::SnapContext &snapc, Context *completion, + int op_flags) + : AbstractAioObjectWrite(ictx, oid, object_no, object_off, data.length(), + snapc, completion, false), + m_write_data(data), m_op_flags(op_flags) { + } + +protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr); + + virtual const char* get_write_type() const { + return "write"; + } + + virtual void pre_object_map_update(uint8_t *new_state) { + *new_state = OBJECT_EXISTS; + } + virtual void send_write(); + +private: + ceph::bufferlist m_write_data; + int m_op_flags; +}; + +class AioObjectRemove : public AbstractAioObjectWrite { +public: + AioObjectRemove(ImageCtx *ictx, const std::string &oid, uint64_t object_no, const ::SnapContext &snapc, Context *completion) - : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion, - true) { - } - - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion, + true), + m_object_state(OBJECT_NONEXISTENT) { + } + +protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + if (has_parent()) { + wr->truncate(0); + } else { wr->remove(); } + } - virtual const char* get_write_type() const { - return "remove (trim)"; + virtual const char* get_write_type() const { + if (has_parent()) { + return "remove (trunc)"; } - - virtual void pre_object_map_update(uint8_t *new_state) { - *new_state = OBJECT_PENDING; - } - - virtual bool post_object_map_update() { - return true; - } - }; - - class AioObjectTruncate : public AbstractAioObjectWrite { - public: - AioObjectTruncate(ImageCtx *ictx, const std::string &oid, - uint64_t object_no, uint64_t object_off, - const ::SnapContext &snapc, Context *completion) - : AbstractAioObjectWrite(ictx, oid, object_no, object_off, 0, snapc, - completion, true) { - } - - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr) { - wr->truncate(m_object_off); + return "remove"; + } + virtual void pre_object_map_update(uint8_t *new_state) { + if (has_parent()) { + m_object_state = OBJECT_EXISTS; + } else { + m_object_state = OBJECT_PENDING; } + *new_state = m_object_state; + } - virtual const char* get_write_type() const { - return "truncate"; - } - - virtual void pre_object_map_update(uint8_t *new_state) { - if (!m_object_exist && !has_parent()) - *new_state = OBJECT_NONEXISTENT; - else - *new_state = OBJECT_EXISTS; - } - virtual void send_write(); - }; - - class AioObjectZero : public AbstractAioObjectWrite { - public: - AioObjectZero(ImageCtx *ictx, const std::string &oid, uint64_t object_no, - uint64_t object_off, uint64_t object_len, - const ::SnapContext &snapc, Context *completion) - : AbstractAioObjectWrite(ictx, oid, object_no, object_off, object_len, - snapc, completion, true) { - } - - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr) { - wr->zero(m_object_off, m_object_len); - } - - virtual const char* get_write_type() const { - return "zero"; + virtual bool post_object_map_update() { + if (m_object_state == OBJECT_EXISTS) { + return false; } - - virtual void pre_object_map_update(uint8_t *new_state) { + return true; + } + + virtual void guard_write(); + virtual void send_write(); + +private: + uint8_t m_object_state; +}; + +class AioObjectTrim : public AbstractAioObjectWrite { +public: + AioObjectTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no, + const ::SnapContext &snapc, Context *completion) + : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion, + true) { + } + +protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + wr->remove(); + } + + virtual const char* get_write_type() const { + return "remove (trim)"; + } + + virtual void pre_object_map_update(uint8_t *new_state) { + *new_state = OBJECT_PENDING; + } + + virtual bool post_object_map_update() { + return true; + } +}; + +class AioObjectTruncate : public AbstractAioObjectWrite { +public: + AioObjectTruncate(ImageCtx *ictx, const std::string &oid, + uint64_t object_no, uint64_t object_off, + const ::SnapContext &snapc, Context *completion) + : AbstractAioObjectWrite(ictx, oid, object_no, object_off, 0, snapc, + completion, true) { + } + +protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + wr->truncate(m_object_off); + } + + virtual const char* get_write_type() const { + return "truncate"; + } + + virtual void pre_object_map_update(uint8_t *new_state) { + if (!m_object_exist && !has_parent()) + *new_state = OBJECT_NONEXISTENT; + else *new_state = OBJECT_EXISTS; - } - }; - -} - -#endif + } + virtual void send_write(); +}; + +class AioObjectZero : public AbstractAioObjectWrite { +public: + AioObjectZero(ImageCtx *ictx, const std::string &oid, uint64_t object_no, + uint64_t object_off, uint64_t object_len, + const ::SnapContext &snapc, Context *completion) + : AbstractAioObjectWrite(ictx, oid, object_no, object_off, object_len, + snapc, completion, true) { + } + +protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + wr->zero(m_object_off, m_object_len); + } + + virtual const char* get_write_type() const { + return "zero"; + } + + virtual void pre_object_map_update(uint8_t *new_state) { + *new_state = OBJECT_EXISTS; + } +}; + +} // namespace librbd + +extern template class librbd::AioObjectRequest; +extern template class librbd::AioObjectRead; + +#endif // CEPH_LIBRBD_AIO_OBJECT_REQUEST_H diff --git a/src/librbd/CopyupRequest.cc b/src/librbd/CopyupRequest.cc index da7c43ae286a0..b95544b28494a 100644 --- a/src/librbd/CopyupRequest.cc +++ b/src/librbd/CopyupRequest.cc @@ -78,228 +78,229 @@ class UpdateObjectMap : public C_AsyncObjectThrottle<> { } // anonymous namespace - CopyupRequest::CopyupRequest(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, - vector >& image_extents) - : m_ictx(ictx), m_oid(oid), m_object_no(objectno), - m_image_extents(image_extents), m_state(STATE_READ_FROM_PARENT) - { - m_async_op.start_op(*m_ictx); - } +CopyupRequest::CopyupRequest(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, + vector >& image_extents) + : m_ictx(ictx), m_oid(oid), m_object_no(objectno), + m_image_extents(image_extents), m_state(STATE_READ_FROM_PARENT) +{ + m_async_op.start_op(*m_ictx); +} - CopyupRequest::~CopyupRequest() { - assert(m_pending_requests.empty()); - m_async_op.finish_op(); - } +CopyupRequest::~CopyupRequest() { + assert(m_pending_requests.empty()); + m_async_op.finish_op(); +} + +void CopyupRequest::append_request(AioObjectRequest<> *req) { + ldout(m_ictx->cct, 20) << __func__ << " " << this << ": " << req << dendl; + m_pending_requests.push_back(req); +} - void CopyupRequest::append_request(AioObjectRequest *req) { - ldout(m_ictx->cct, 20) << __func__ << " " << this << ": " << req << dendl; - m_pending_requests.push_back(req); +void CopyupRequest::complete_requests(int r) { + while (!m_pending_requests.empty()) { + vector *>::iterator it = m_pending_requests.begin(); + AioObjectRequest<> *req = *it; + ldout(m_ictx->cct, 20) << __func__ << " completing request " << req + << dendl; + req->complete(r); + m_pending_requests.erase(it); } +} - void CopyupRequest::complete_requests(int r) { - while (!m_pending_requests.empty()) { - vector::iterator it = m_pending_requests.begin(); - AioObjectRequest *req = *it; - ldout(m_ictx->cct, 20) << __func__ << " completing request " << req - << dendl; - req->complete(r); - m_pending_requests.erase(it); - } +bool CopyupRequest::send_copyup() { + bool add_copyup_op = !m_copyup_data.is_zero(); + bool copy_on_read = m_pending_requests.empty(); + if (!add_copyup_op && copy_on_read) { + // copyup empty object to prevent future CoR attempts + m_copyup_data.clear(); + add_copyup_op = true; } - bool CopyupRequest::send_copyup() { - bool add_copyup_op = !m_copyup_data.is_zero(); - bool copy_on_read = m_pending_requests.empty(); - if (!add_copyup_op && copy_on_read) { - // copyup empty object to prevent future CoR attempts - m_copyup_data.clear(); - add_copyup_op = true; - } + ldout(m_ictx->cct, 20) << __func__ << " " << this + << ": oid " << m_oid << dendl; + m_state = STATE_COPYUP; - ldout(m_ictx->cct, 20) << __func__ << " " << this - << ": oid " << m_oid << dendl; - m_state = STATE_COPYUP; + m_ictx->snap_lock.get_read(); + ::SnapContext snapc = m_ictx->snapc; + m_ictx->snap_lock.put_read(); - m_ictx->snap_lock.get_read(); - ::SnapContext snapc = m_ictx->snapc; - m_ictx->snap_lock.put_read(); + std::vector snaps; - std::vector snaps; + if (!copy_on_read) { + m_pending_copyups.inc(); + } - if (!copy_on_read) { - m_pending_copyups.inc(); - } + int r; + if (copy_on_read || (!snapc.snaps.empty() && add_copyup_op)) { + assert(add_copyup_op); + add_copyup_op = false; + + librados::ObjectWriteOperation copyup_op; + copyup_op.exec("rbd", "copyup", m_copyup_data); + + // send only the copyup request with a blank snapshot context so that + // all snapshots are detected from the parent for this object. If + // this is a CoW request, a second request will be created for the + // actual modification. + m_pending_copyups.inc(); + + ldout(m_ictx->cct, 20) << __func__ << " " << this << " copyup with " + << "empty snapshot context" << dendl; + librados::AioCompletion *comp = util::create_rados_safe_callback(this); + r = m_ictx->md_ctx.aio_operate(m_oid, comp, ©up_op, 0, snaps); + assert(r == 0); + comp->release(); + } - int r; - if (copy_on_read || (!snapc.snaps.empty() && add_copyup_op)) { - assert(add_copyup_op); - add_copyup_op = false; - - librados::ObjectWriteOperation copyup_op; - copyup_op.exec("rbd", "copyup", m_copyup_data); - - // send only the copyup request with a blank snapshot context so that - // all snapshots are detected from the parent for this object. If - // this is a CoW request, a second request will be created for the - // actual modification. - m_pending_copyups.inc(); - - ldout(m_ictx->cct, 20) << __func__ << " " << this << " copyup with " - << "empty snapshot context" << dendl; - librados::AioCompletion *comp = util::create_rados_safe_callback(this); - r = m_ictx->md_ctx.aio_operate(m_oid, comp, ©up_op, 0, snaps); - assert(r == 0); - comp->release(); + if (!copy_on_read) { + librados::ObjectWriteOperation write_op; + if (add_copyup_op) { + // CoW did not need to handle existing snapshots + write_op.exec("rbd", "copyup", m_copyup_data); } - if (!copy_on_read) { - librados::ObjectWriteOperation write_op; - if (add_copyup_op) { - // CoW did not need to handle existing snapshots - write_op.exec("rbd", "copyup", m_copyup_data); - } - - // merge all pending write ops into this single RADOS op - for (size_t i=0; icct, 20) << __func__ << " add_copyup_ops " << req - << dendl; - req->add_copyup_ops(&write_op); - } - assert(write_op.size() != 0); - - snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); - librados::AioCompletion *comp = util::create_rados_safe_callback(this); - r = m_ictx->data_ctx.aio_operate(m_oid, comp, &write_op); - assert(r == 0); - comp->release(); + // merge all pending write ops into this single RADOS op + for (size_t i=0; i *req = m_pending_requests[i]; + ldout(m_ictx->cct, 20) << __func__ << " add_copyup_ops " << req + << dendl; + req->add_copyup_ops(&write_op); } - return false; + assert(write_op.size() != 0); + + snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); + librados::AioCompletion *comp = util::create_rados_safe_callback(this); + r = m_ictx->data_ctx.aio_operate(m_oid, comp, &write_op); + assert(r == 0); + comp->release(); } + return false; +} - void CopyupRequest::send() - { - m_state = STATE_READ_FROM_PARENT; - AioCompletion *comp = AioCompletion::create_and_start( - this, m_ictx, AIO_TYPE_READ); +void CopyupRequest::send() +{ + m_state = STATE_READ_FROM_PARENT; + AioCompletion *comp = AioCompletion::create_and_start( + this, m_ictx, AIO_TYPE_READ); + + ldout(m_ictx->cct, 20) << __func__ << " " << this + << ": completion " << comp + << ", oid " << m_oid + << ", extents " << m_image_extents + << dendl; + RWLock::RLocker owner_locker(m_ictx->parent->owner_lock); + AioImageRequest<>::aio_read(m_ictx->parent, comp, m_image_extents, NULL, + &m_copyup_data, 0); +} - ldout(m_ictx->cct, 20) << __func__ << " " << this - << ": completion " << comp - << ", oid " << m_oid - << ", extents " << m_image_extents - << dendl; - RWLock::RLocker owner_locker(m_ictx->parent->owner_lock); - AioImageRequest<>::aio_read(m_ictx->parent, comp, m_image_extents, NULL, - &m_copyup_data, 0); +void CopyupRequest::complete(int r) +{ + if (should_complete(r)) { + complete_requests(r); + delete this; } +} - void CopyupRequest::complete(int r) - { - if (should_complete(r)) { - complete_requests(r); - delete this; +bool CopyupRequest::should_complete(int r) +{ + CephContext *cct = m_ictx->cct; + ldout(cct, 20) << __func__ << " " << this + << ": oid " << m_oid + << ", extents " << m_image_extents + << ", r " << r << dendl; + + uint64_t pending_copyups; + switch (m_state) { + case STATE_READ_FROM_PARENT: + ldout(cct, 20) << "READ_FROM_PARENT" << dendl; + remove_from_list(); + if (r >= 0 || r == -ENOENT) { + return send_object_map(); } - } - - bool CopyupRequest::should_complete(int r) - { - CephContext *cct = m_ictx->cct; - ldout(cct, 20) << __func__ << " " << this - << ": oid " << m_oid - << ", extents " << m_image_extents - << ", r " << r << dendl; - - uint64_t pending_copyups; - switch (m_state) { - case STATE_READ_FROM_PARENT: - ldout(cct, 20) << "READ_FROM_PARENT" << dendl; - remove_from_list(); - if (r >= 0 || r == -ENOENT) { - return send_object_map(); - } - break; - - case STATE_OBJECT_MAP: - ldout(cct, 20) << "OBJECT_MAP" << dendl; - assert(r == 0); - return send_copyup(); - - case STATE_COPYUP: - // invoked via a finisher in librados, so thread safe - pending_copyups = m_pending_copyups.dec(); - ldout(cct, 20) << "COPYUP (" << pending_copyups << " pending)" - << dendl; - if (r == -ENOENT) { - // hide the -ENOENT error if this is the last op - if (pending_copyups == 0) { - complete_requests(0); - } - } else if (r < 0) { - complete_requests(r); + break; + + case STATE_OBJECT_MAP: + ldout(cct, 20) << "OBJECT_MAP" << dendl; + assert(r == 0); + return send_copyup(); + + case STATE_COPYUP: + // invoked via a finisher in librados, so thread safe + pending_copyups = m_pending_copyups.dec(); + ldout(cct, 20) << "COPYUP (" << pending_copyups << " pending)" + << dendl; + if (r == -ENOENT) { + // hide the -ENOENT error if this is the last op + if (pending_copyups == 0) { + complete_requests(0); } - return (pending_copyups == 0); - - default: - lderr(cct) << "invalid state: " << m_state << dendl; - assert(false); - break; + } else if (r < 0) { + complete_requests(r); } - return (r < 0); + return (pending_copyups == 0); + + default: + lderr(cct) << "invalid state: " << m_state << dendl; + assert(false); + break; } + return (r < 0); +} - void CopyupRequest::remove_from_list() - { - Mutex::Locker l(m_ictx->copyup_list_lock); +void CopyupRequest::remove_from_list() +{ + Mutex::Locker l(m_ictx->copyup_list_lock); - map::iterator it = - m_ictx->copyup_list.find(m_object_no); - assert(it != m_ictx->copyup_list.end()); - m_ictx->copyup_list.erase(it); - } + map::iterator it = + m_ictx->copyup_list.find(m_object_no); + assert(it != m_ictx->copyup_list.end()); + m_ictx->copyup_list.erase(it); +} - bool CopyupRequest::send_object_map() { - { - RWLock::RLocker owner_locker(m_ictx->owner_lock); - RWLock::RLocker snap_locker(m_ictx->snap_lock); - if (m_ictx->object_map != nullptr) { - bool copy_on_read = m_pending_requests.empty(); - assert(m_ictx->exclusive_lock->is_lock_owner()); - - RWLock::WLocker object_map_locker(m_ictx->object_map_lock); - if (copy_on_read && - (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) { - // CoW already updates the HEAD object map - m_snap_ids.push_back(CEPH_NOSNAP); - } - if (!m_ictx->snaps.empty()) { - m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(), - m_ictx->snaps.end()); - } +bool CopyupRequest::send_object_map() { + { + RWLock::RLocker owner_locker(m_ictx->owner_lock); + RWLock::RLocker snap_locker(m_ictx->snap_lock); + if (m_ictx->object_map != nullptr) { + bool copy_on_read = m_pending_requests.empty(); + assert(m_ictx->exclusive_lock->is_lock_owner()); + + RWLock::WLocker object_map_locker(m_ictx->object_map_lock); + if (copy_on_read && + (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) { + // CoW already updates the HEAD object map + m_snap_ids.push_back(CEPH_NOSNAP); + } + if (!m_ictx->snaps.empty()) { + m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(), + m_ictx->snaps.end()); } } + } - // avoid possible recursive lock attempts - if (m_snap_ids.empty()) { - // no object map update required - return send_copyup(); - } else { - // update object maps for HEAD and all existing snapshots - ldout(m_ictx->cct, 20) << __func__ << " " << this - << ": oid " << m_oid - << dendl; - m_state = STATE_OBJECT_MAP; - - RWLock::RLocker owner_locker(m_ictx->owner_lock); - AsyncObjectThrottle<>::ContextFactory context_factory( - boost::lambda::bind(boost::lambda::new_ptr(), - boost::lambda::_1, m_ictx, m_object_no, &m_snap_ids, - boost::lambda::_2)); - AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( - NULL, *m_ictx, context_factory, util::create_context_callback(this), - NULL, 0, m_snap_ids.size()); - throttle->start_ops(m_ictx->concurrent_management_ops); - } - return false; + // avoid possible recursive lock attempts + if (m_snap_ids.empty()) { + // no object map update required + return send_copyup(); + } else { + // update object maps for HEAD and all existing snapshots + ldout(m_ictx->cct, 20) << __func__ << " " << this + << ": oid " << m_oid + << dendl; + m_state = STATE_OBJECT_MAP; + + RWLock::RLocker owner_locker(m_ictx->owner_lock); + AsyncObjectThrottle<>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr(), + boost::lambda::_1, m_ictx, m_object_no, &m_snap_ids, + boost::lambda::_2)); + AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( + NULL, *m_ictx, context_factory, util::create_context_callback(this), + NULL, 0, m_snap_ids.size()); + throttle->start_ops(m_ictx->concurrent_management_ops); } + return false; } + +} // namespace librbd diff --git a/src/librbd/CopyupRequest.h b/src/librbd/CopyupRequest.h index 81cdc5b117ad1..62787681112b9 100644 --- a/src/librbd/CopyupRequest.h +++ b/src/librbd/CopyupRequest.h @@ -1,7 +1,8 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab -#ifndef CEPH_LIBRBD_COPYUPREQUEST_H -#define CEPH_LIBRBD_COPYUPREQUEST_H + +#ifndef CEPH_LIBRBD_COPYUP_REQUEST_H +#define CEPH_LIBRBD_COPYUP_REQUEST_H #include "librbd/AsyncOperation.h" #include "include/int_types.h" @@ -9,76 +10,79 @@ namespace librbd { - struct AioCompletion; - - class CopyupRequest { - public: - CopyupRequest(ImageCtx *ictx, const std::string &oid, uint64_t objectno, - vector >& image_extents); - ~CopyupRequest(); - - void append_request(AioObjectRequest *req); - - void send(); - - void complete(int r); - - private: - /** - * Copyup requests go through the following state machine to read from the - * parent image, update the object map, and copyup the object: - * - * - * @verbatim - * - * - * | - * v - * STATE_READ_FROM_PARENT - * . . | - * . . v - * . . STATE_OBJECT_MAP . . - * . . | . - * . . v . - * . . . > STATE_COPYUP . - * . | . - * . v . - * . . . . > < . . . . . - * - * @endverbatim - * - * The _OBJECT_MAP state is skipped if the object map isn't enabled or if - * an object map update isn't required. The _COPYUP state is skipped if - * no data was read from the parent *and* there are no additional ops. - */ - enum State { - STATE_READ_FROM_PARENT, - STATE_OBJECT_MAP, - STATE_COPYUP - }; - - ImageCtx *m_ictx; - std::string m_oid; - uint64_t m_object_no; - vector > m_image_extents; - State m_state; - ceph::bufferlist m_copyup_data; - vector m_pending_requests; - atomic_t m_pending_copyups; - - AsyncOperation m_async_op; - - std::vector m_snap_ids; - - void complete_requests(int r); - - bool should_complete(int r); - - void remove_from_list(); - - bool send_object_map(); - bool send_copyup(); +struct AioCompletion; +template class AioObjectRequest; +struct ImageCtx; + +class CopyupRequest { +public: + CopyupRequest(ImageCtx *ictx, const std::string &oid, uint64_t objectno, + vector >& image_extents); + ~CopyupRequest(); + + void append_request(AioObjectRequest *req); + + void send(); + + void complete(int r); + +private: + /** + * Copyup requests go through the following state machine to read from the + * parent image, update the object map, and copyup the object: + * + * + * @verbatim + * + * + * | + * v + * STATE_READ_FROM_PARENT + * . . | + * . . v + * . . STATE_OBJECT_MAP . . + * . . | . + * . . v . + * . . . > STATE_COPYUP . + * . | . + * . v . + * . . . . > < . . . . . + * + * @endverbatim + * + * The _OBJECT_MAP state is skipped if the object map isn't enabled or if + * an object map update isn't required. The _COPYUP state is skipped if + * no data was read from the parent *and* there are no additional ops. + */ + enum State { + STATE_READ_FROM_PARENT, + STATE_OBJECT_MAP, + STATE_COPYUP }; -} -#endif + ImageCtx *m_ictx; + std::string m_oid; + uint64_t m_object_no; + vector > m_image_extents; + State m_state; + ceph::bufferlist m_copyup_data; + vector *> m_pending_requests; + atomic_t m_pending_copyups; + + AsyncOperation m_async_op; + + std::vector m_snap_ids; + + void complete_requests(int r); + + bool should_complete(int r); + + void remove_from_list(); + + bool send_object_map(); + bool send_copyup(); +}; + +} // namespace librbd + +#endif // CEPH_LIBRBD_COPYUP_REQUEST_H diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc index 671553f5b830d..f98977bac6dd9 100644 --- a/src/librbd/Journal.cc +++ b/src/librbd/Journal.cc @@ -10,6 +10,7 @@ #include "librbd/Utils.h" #include "cls/journal/cls_journal_types.h" #include "journal/Journaler.h" +#include "journal/Policy.h" #include "journal/ReplayEntry.h" #include "journal/Settings.h" #include "common/errno.h" @@ -628,6 +629,14 @@ bool Journal::is_journal_replaying() const { m_state == STATE_RESTARTING_REPLAY); } +template +bool Journal::is_journal_appending() const { + assert(m_image_ctx.snap_lock.is_locked()); + Mutex::Locker locker(m_lock); + return (m_state == STATE_READY && + !m_image_ctx.get_journal_policy()->append_disabled()); +} + template void Journal::wait_for_journal_ready(Context *on_ready) { on_ready = create_async_context_callback(m_image_ctx, on_ready); diff --git a/src/librbd/Journal.h b/src/librbd/Journal.h index 496c8819f9636..ec3b328d88d40 100644 --- a/src/librbd/Journal.h +++ b/src/librbd/Journal.h @@ -31,7 +31,7 @@ namespace librados { namespace librbd { -class AioObjectRequest; +struct AioObjectRequestHandle; class ImageCtx; namespace journal { template class Replay; } @@ -87,7 +87,7 @@ class Journal { static const std::string LOCAL_MIRROR_UUID; static const std::string ORPHAN_MIRROR_UUID; - typedef std::list AioObjectRequests; + typedef std::list AioObjectRequests; Journal(ImageCtxT &image_ctx); ~Journal(); @@ -111,6 +111,7 @@ class Journal { bool is_journal_ready() const; bool is_journal_replaying() const; + bool is_journal_appending() const; void wait_for_journal_ready(Context *on_ready); diff --git a/src/librbd/LibrbdWriteback.cc b/src/librbd/LibrbdWriteback.cc index e3ba517849c23..977b0b3163d75 100644 --- a/src/librbd/LibrbdWriteback.cc +++ b/src/librbd/LibrbdWriteback.cc @@ -162,7 +162,7 @@ namespace librbd { request_sent = true; AioObjectWrite *req = new AioObjectWrite(image_ctx, oid, object_no, off, - bl, snapc, this); + bl, snapc, this, 0); req->send(); } }; @@ -274,7 +274,7 @@ namespace librbd { journal_tid)); } else { AioObjectWrite *req = new AioObjectWrite(m_ictx, oid.name, object_no, - off, bl, snapc, req_comp); + off, bl, snapc, req_comp, 0); req->send(); } return ++m_tid; diff --git a/src/librbd/Utils.h b/src/librbd/Utils.h index dcdad1d6fbb6b..46b9401ecb07e 100644 --- a/src/librbd/Utils.h +++ b/src/librbd/Utils.h @@ -154,6 +154,7 @@ Context *create_async_context_callback(I &image_ctx, Context *on_finish) { image_ctx.op_work_queue, on_finish); } +// TODO: temporary until AioCompletion supports templated ImageCtx inline ImageCtx *get_image_ctx(ImageCtx *image_ctx) { return image_ctx; } diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index af000b7464ba1..05a9a31a68b79 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -3814,13 +3814,6 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force, return 0; } - void rbd_req_cb(completion_t cb, void *arg) - { - AioObjectRequest *req = reinterpret_cast(arg); - AioCompletion *comp = reinterpret_cast(cb); - req->complete(comp->get_return_value()); - } - struct C_RBD_Readahead : public Context { ImageCtx *ictx; object_t oid; diff --git a/src/librbd/journal/Policy.h b/src/librbd/journal/Policy.h index 826562228a021..2ef21e6fe0e8a 100644 --- a/src/librbd/journal/Policy.h +++ b/src/librbd/journal/Policy.h @@ -14,8 +14,8 @@ struct Policy { virtual ~Policy() { } + virtual bool append_disabled() const = 0; virtual void allocate_tag_on_lock(Context *on_finish) = 0; - virtual void cancel_external_replay(Context *on_finish) = 0; }; } // namespace journal diff --git a/src/librbd/journal/StandardPolicy.cc b/src/librbd/journal/StandardPolicy.cc index 9e718288706e8..5cba7c0201126 100644 --- a/src/librbd/journal/StandardPolicy.cc +++ b/src/librbd/journal/StandardPolicy.cc @@ -25,10 +25,5 @@ void StandardPolicy::allocate_tag_on_lock(Context *on_finish) { m_image_ctx->journal->allocate_local_tag(on_finish); } -void StandardPolicy::cancel_external_replay(Context *on_finish) { - // external replay is only handled by rbd-mirror - assert(false); -} - } // namespace journal } // namespace librbd diff --git a/src/librbd/journal/StandardPolicy.h b/src/librbd/journal/StandardPolicy.h index c49ec9cb46730..c2c997c8aecc3 100644 --- a/src/librbd/journal/StandardPolicy.h +++ b/src/librbd/journal/StandardPolicy.h @@ -17,8 +17,10 @@ class StandardPolicy : public Policy { StandardPolicy(ImageCtx *image_ctx) : m_image_ctx(image_ctx) { } + virtual bool append_disabled() const { + return false; + } virtual void allocate_tag_on_lock(Context *on_finish); - virtual void cancel_external_replay(Context *on_finish); private: ImageCtx *m_image_ctx; diff --git a/src/librbd/operation/FlattenRequest.cc b/src/librbd/operation/FlattenRequest.cc index 85433aeaa3643..8cfddbeac74dc 100644 --- a/src/librbd/operation/FlattenRequest.cc +++ b/src/librbd/operation/FlattenRequest.cc @@ -42,7 +42,7 @@ class C_FlattenObject : public C_AsyncObjectThrottle { bufferlist bl; string oid = image_ctx.get_object_name(m_object_no); AioObjectWrite *req = new AioObjectWrite(&image_ctx, oid, m_object_no, 0, - bl, m_snapc, this); + bl, m_snapc, this, 0); if (!req->has_parent()) { // stop early if the parent went away - it just means // another flatten finished first or the image was resized diff --git a/src/librbd/operation/Request.cc b/src/librbd/operation/Request.cc index f1ad960d0c444..7534004edc314 100644 --- a/src/librbd/operation/Request.cc +++ b/src/librbd/operation/Request.cc @@ -76,8 +76,8 @@ bool Request::append_op_event() { assert(image_ctx.owner_lock.is_locked()); RWLock::RLocker snap_locker(image_ctx.snap_lock); - if (image_ctx.journal != NULL && - !image_ctx.journal->is_journal_replaying()) { + if (image_ctx.journal != nullptr && + image_ctx.journal->is_journal_appending()) { append_op_event(util::create_context_callback< Request, &Request::handle_op_event_safe>(this)); return true; @@ -98,8 +98,8 @@ bool Request::commit_op_event(int r) { assert(!m_committed_op_event); m_committed_op_event = true; - if (image_ctx.journal != NULL && - !image_ctx.journal->is_journal_replaying()) { + if (image_ctx.journal != nullptr && + image_ctx.journal->is_journal_appending()) { CephContext *cct = image_ctx.cct; ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl; diff --git a/src/librbd/operation/Request.h b/src/librbd/operation/Request.h index 6a09cb1d63921..78993160bf636 100644 --- a/src/librbd/operation/Request.h +++ b/src/librbd/operation/Request.h @@ -40,14 +40,16 @@ class Request : public AsyncRequest { assert(can_affect_io()); RWLock::RLocker owner_locker(image_ctx.owner_lock); RWLock::RLocker snap_locker(image_ctx.snap_lock); - if (image_ctx.journal != NULL) { - Context *ctx = util::create_context_callback(request); + if (image_ctx.journal != nullptr) { if (image_ctx.journal->is_journal_replaying()) { + Context *ctx = util::create_context_callback(request); replay_op_ready(ctx); - } else { + return true; + } else if (image_ctx.journal->is_journal_appending()) { + Context *ctx = util::create_context_callback(request); append_op_event(ctx); + return true; } - return true; } return false; } diff --git a/src/librbd/operation/TrimRequest.cc b/src/librbd/operation/TrimRequest.cc index 58266a4c28fd7..3992fb75e21c2 100644 --- a/src/librbd/operation/TrimRequest.cc +++ b/src/librbd/operation/TrimRequest.cc @@ -45,8 +45,8 @@ class C_CopyupObject : public C_AsyncObjectThrottle { string oid = image_ctx.get_object_name(m_object_no); ldout(image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl; - AioObjectRequest *req = new AioObjectTrim(&image_ctx, oid, m_object_no, - m_snapc, this); + AioObjectRequest<> *req = new AioObjectTrim(&image_ctx, oid, m_object_no, + m_snapc, this); req->send(); return 0; } @@ -361,7 +361,7 @@ void TrimRequest::send_clean_boundary() { ldout(cct, 20) << " ex " << *p << dendl; Context *req_comp = new C_ContextCompletion(*completion); - AioObjectRequest *req; + AioObjectRequest<> *req; if (p->offset == 0) { req = new AioObjectTrim(&image_ctx, p->oid.name, p->objectno, snapc, req_comp); diff --git a/src/test/librbd/CMakeLists.txt b/src/test/librbd/CMakeLists.txt index ad2cf9488ce24..edd3ed679d82e 100644 --- a/src/test/librbd/CMakeLists.txt +++ b/src/test/librbd/CMakeLists.txt @@ -22,30 +22,32 @@ set_target_properties(rbd_test_mock PROPERTIES COMPILE_FLAGS # unittest_librbd # doesn't use add_ceph_test because it is called by run-rbd-unit-tests.sh set(unittest_librbd_srcs - test_ConsistencyGroups.cc + test_ConsistencyGroups.cc test_main.cc - test_mock_fixture.cc - test_mock_ExclusiveLock.cc - test_mock_Journal.cc - test_mock_ObjectWatcher.cc - exclusive_lock/test_mock_AcquireRequest.cc - exclusive_lock/test_mock_ReleaseRequest.cc + test_mock_fixture.cc + test_mock_AioImageRequest.cc + test_mock_ExclusiveLock.cc + test_mock_Journal.cc + test_mock_ObjectWatcher.cc + exclusive_lock/test_mock_AcquireRequest.cc + exclusive_lock/test_mock_ReleaseRequest.cc image/test_mock_RefreshRequest.cc - journal/test_mock_Replay.cc - object_map/test_mock_InvalidateRequest.cc - object_map/test_mock_LockRequest.cc - object_map/test_mock_RefreshRequest.cc - object_map/test_mock_ResizeRequest.cc - object_map/test_mock_SnapshotCreateRequest.cc - object_map/test_mock_SnapshotRemoveRequest.cc - object_map/test_mock_SnapshotRollbackRequest.cc - object_map/test_mock_UnlockRequest.cc - object_map/test_mock_UpdateRequest.cc - operation/test_mock_ResizeRequest.cc - operation/test_mock_SnapshotCreateRequest.cc - operation/test_mock_SnapshotProtectRequest.cc - operation/test_mock_SnapshotRemoveRequest.cc - operation/test_mock_SnapshotRollbackRequest.cc + journal/test_mock_Replay.cc + object_map/test_mock_InvalidateRequest.cc + object_map/test_mock_LockRequest.cc + object_map/test_mock_RefreshRequest.cc + object_map/test_mock_ResizeRequest.cc + object_map/test_mock_SnapshotCreateRequest.cc + object_map/test_mock_SnapshotRemoveRequest.cc + object_map/test_mock_SnapshotRollbackRequest.cc + object_map/test_mock_UnlockRequest.cc + object_map/test_mock_UpdateRequest.cc + operation/test_mock_Request.cc + operation/test_mock_ResizeRequest.cc + operation/test_mock_SnapshotCreateRequest.cc + operation/test_mock_SnapshotProtectRequest.cc + operation/test_mock_SnapshotRemoveRequest.cc + operation/test_mock_SnapshotRollbackRequest.cc operation/test_mock_SnapshotUnprotectRequest.cc ) add_executable(unittest_librbd diff --git a/src/test/librbd/mock/MockImageCtx.h b/src/test/librbd/mock/MockImageCtx.h index b7b73c835fa6f..2818eb566dacf 100644 --- a/src/test/librbd/mock/MockImageCtx.h +++ b/src/test/librbd/mock/MockImageCtx.h @@ -39,6 +39,7 @@ struct MockImageCtx { MockImageCtx(librbd::ImageCtx &image_ctx) : image_ctx(&image_ctx), cct(image_ctx.cct), + perfcounter(image_ctx.perfcounter), snap_name(image_ctx.snap_name), snap_id(image_ctx.snap_id), snap_exists(image_ctx.snap_exists), @@ -47,18 +48,19 @@ struct MockImageCtx { snap_info(image_ctx.snap_info), snap_ids(image_ctx.snap_ids), object_cacher(image_ctx.object_cacher), + object_set(image_ctx.object_set), old_format(image_ctx.old_format), read_only(image_ctx.read_only), lockers(image_ctx.lockers), exclusive_locked(image_ctx.exclusive_locked), lock_tag(image_ctx.lock_tag), - owner_lock("owner_lock"), - md_lock("md_lock"), - cache_lock("cache_lock"), - snap_lock("snap_lock"), - parent_lock("parent_lock"), - object_map_lock("object_map_lock"), - async_ops_lock("async_ops_lock"), + owner_lock(image_ctx.owner_lock), + md_lock(image_ctx.md_lock), + cache_lock(image_ctx.cache_lock), + snap_lock(image_ctx.snap_lock), + parent_lock(image_ctx.parent_lock), + object_map_lock(image_ctx.object_map_lock), + async_ops_lock(image_ctx.async_ops_lock), order(image_ctx.order), size(image_ctx.size), features(image_ctx.features), @@ -70,9 +72,11 @@ struct MockImageCtx { id(image_ctx.id), name(image_ctx.name), parent_md(image_ctx.parent_md), + format_string(image_ctx.format_string), layout(image_ctx.layout), aio_work_queue(new MockAioImageRequestWQ()), op_work_queue(new MockContextWQ()), + readahead_max_bytes(image_ctx.readahead_max_bytes), parent(NULL), operations(new MockOperations()), state(new MockImageState()), image_watcher(NULL), object_map(NULL), @@ -145,6 +149,7 @@ struct MockImageCtx { MOCK_METHOD2(rm_snap, void(std::string in_snap_name, librados::snap_t id)); MOCK_METHOD1(flush, void(Context *)); + MOCK_METHOD1(flush_async_operations, void(Context *)); MOCK_METHOD1(flush_copyup, void(Context *)); MOCK_METHOD1(invalidate_cache, void(Context *)); @@ -165,8 +170,14 @@ struct MockImageCtx { MOCK_CONST_METHOD0(get_journal_policy, journal::Policy*()); + MOCK_METHOD7(aio_read_from_cache, void(object_t, uint64_t, bufferlist *, + size_t, uint64_t, Context *, int)); + MOCK_METHOD7(write_to_cache, void(object_t, const bufferlist&, size_t, + uint64_t, Context *, int, uint64_t)); + ImageCtx *image_ctx; CephContext *cct; + PerfCounters *perfcounter; std::string snap_name; uint64_t snap_id; @@ -178,6 +189,7 @@ struct MockImageCtx { std::map snap_ids; ObjectCacher *object_cacher; + ObjectCacher::ObjectSet *object_set; bool old_format; bool read_only; @@ -190,13 +202,13 @@ struct MockImageCtx { librados::IoCtx md_ctx; librados::IoCtx data_ctx; - RWLock owner_lock; - RWLock md_lock; - Mutex cache_lock; - RWLock snap_lock; - RWLock parent_lock; - RWLock object_map_lock; - Mutex async_ops_lock; + RWLock &owner_lock; + RWLock &md_lock; + Mutex &cache_lock; + RWLock &snap_lock; + RWLock &parent_lock; + RWLock &object_map_lock; + Mutex &async_ops_lock; uint8_t order; uint64_t size; @@ -209,6 +221,7 @@ struct MockImageCtx { std::string id; std::string name; parent_info parent_md; + char *format_string; file_layout_t layout; @@ -221,6 +234,7 @@ struct MockImageCtx { MockContextWQ *op_work_queue; MockReadahead readahead; + uint64_t readahead_max_bytes; MockImageCtx *parent; MockOperations *operations; diff --git a/src/test/librbd/mock/MockJournal.h b/src/test/librbd/mock/MockJournal.h index cfcb12c06ec15..48447c3b8b476 100644 --- a/src/test/librbd/mock/MockJournal.h +++ b/src/test/librbd/mock/MockJournal.h @@ -7,10 +7,16 @@ #include "gmock/gmock.h" #include "librbd/Journal.h" #include "librbd/journal/Types.h" +#include namespace librbd { +struct AioObjectRequestHandle; +struct ImageCtx; + struct MockJournal { + typedef std::list AioObjectRequests; + static MockJournal *s_instance; static MockJournal *get_instance() { assert(s_instance != nullptr); @@ -28,6 +34,7 @@ struct MockJournal { MOCK_CONST_METHOD0(is_journal_ready, bool()); MOCK_CONST_METHOD0(is_journal_replaying, bool()); + MOCK_CONST_METHOD0(is_journal_appending, bool()); MOCK_METHOD1(wait_for_journal_ready, void(Context *)); @@ -47,6 +54,21 @@ struct MockJournal { MOCK_METHOD0(allocate_op_tid, uint64_t()); + MOCK_METHOD5(append_write_event, uint64_t(uint64_t, size_t, + const bufferlist &, + const AioObjectRequests &, bool)); + MOCK_METHOD5(append_io_event_mock, uint64_t(const journal::EventEntry&, + const AioObjectRequests &, + uint64_t, size_t, bool)); + uint64_t append_io_event(journal::EventEntry &&event_entry, + const AioObjectRequests &requests, + uint64_t offset, size_t length, + bool flush_entry) { + // googlemock doesn't support move semantics + return append_io_event_mock(event_entry, requests, offset, length, + flush_entry); + } + MOCK_METHOD3(append_op_event_mock, void(uint64_t, const journal::EventEntry&, Context *)); void append_op_event(uint64_t op_tid, journal::EventEntry &&event_entry, @@ -55,6 +77,9 @@ struct MockJournal { append_op_event_mock(op_tid, event_entry, on_safe); } + MOCK_METHOD2(flush_event, void(uint64_t, Context *)); + MOCK_METHOD2(wait_event, void(uint64_t, Context *)); + MOCK_METHOD3(commit_op_event, void(uint64_t, int, Context *)); MOCK_METHOD2(replay_op_ready, void(uint64_t, Context *)); diff --git a/src/test/librbd/mock/MockJournalPolicy.h b/src/test/librbd/mock/MockJournalPolicy.h index e7debfaf535cc..8ad6ff60952ee 100644 --- a/src/test/librbd/mock/MockJournalPolicy.h +++ b/src/test/librbd/mock/MockJournalPolicy.h @@ -11,8 +11,8 @@ namespace librbd { struct MockJournalPolicy : public journal::Policy { + MOCK_CONST_METHOD0(append_disabled, bool()); MOCK_METHOD1(allocate_tag_on_lock, void(Context*)); - MOCK_METHOD1(cancel_external_replay, void(Context*)); }; diff --git a/src/test/librbd/operation/test_mock_Request.cc b/src/test/librbd/operation/test_mock_Request.cc new file mode 100644 index 0000000000000..397ffd8eba9c3 --- /dev/null +++ b/src/test/librbd/operation/test_mock_Request.cc @@ -0,0 +1,176 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/librbd/test_mock_fixture.h" +#include "test/librbd/test_support.h" +#include "test/librbd/mock/MockImageCtx.h" +#include "test/librbd/mock/MockJournal.h" +#include "librbd/AsyncRequest.h" +#include "librbd/operation/Request.h" + +namespace librbd { +namespace { + +struct MockTestImageCtx : public MockImageCtx { + MockTestImageCtx(ImageCtx &image_ctx) : MockImageCtx(image_ctx) { + } +}; + +} // anonymous namespace + +template <> +struct AsyncRequest { + librbd::MockTestImageCtx &m_image_ctx; + Context *m_on_finish; + + AsyncRequest(librbd::MockTestImageCtx &image_ctx, Context *on_finish) + : m_image_ctx(image_ctx), m_on_finish(on_finish) { + } + virtual ~AsyncRequest() { + } + + virtual void finish(int r) { + m_on_finish->complete(r); + } + virtual void finish_and_destroy(int r) { + finish(r); + delete this; + } +}; + +} // namespace librbd + +#include "librbd/operation/Request.cc" +template class librbd::operation::Request; + +namespace librbd { +namespace journal { + +std::ostream& operator<<(std::ostream& os, const Event&) { + return os; +} + +} // namespace journal + +namespace operation { + +using ::testing::InSequence; +using ::testing::Invoke; +using ::testing::Return; + +struct MockRequest : public Request { + MockRequest(librbd::MockTestImageCtx &image_ctx, Context *on_finish, + uint64_t journal_op_tid) + : Request(image_ctx, on_finish, journal_op_tid) { + } + + void complete(int r) { + finish_and_destroy(r); + } + + void send_op_impl(int r) { + bool appending = append_op_event< + MockRequest, &MockRequest::handle_send>(this); + if (!appending) { + complete(r); + } + } + MOCK_METHOD1(should_complete, bool(int)); + MOCK_METHOD0(send_op, void()); + MOCK_METHOD1(handle_send, Context*(int*)); + MOCK_CONST_METHOD0(can_affect_io, bool()); + MOCK_CONST_METHOD1(create_event, journal::Event(uint64_t)); +}; + +struct TestMockOperationRequest : public TestMockFixture { + void expect_can_affect_io(MockRequest &mock_request, bool can_affect) { + EXPECT_CALL(mock_request, can_affect_io()) + .WillOnce(Return(can_affect)); + } + + void expect_is_journal_replaying(MockJournal &mock_journal, bool replaying) { + EXPECT_CALL(mock_journal, is_journal_replaying()) + .WillOnce(Return(replaying)); + } + + void expect_is_journal_appending(MockJournal &mock_journal, bool appending) { + EXPECT_CALL(mock_journal, is_journal_appending()) + .WillOnce(Return(appending)); + } + + void expect_send_op(MockRequest &mock_request, int r) { + EXPECT_CALL(mock_request, send_op()) + .WillOnce(Invoke([&mock_request, r]() { + mock_request.complete(r); + })); + } + + void expect_send_op_affects_io(MockImageCtx &mock_image_ctx, + MockRequest &mock_request, int r) { + EXPECT_CALL(mock_request, send_op()) + .WillOnce(Invoke([&mock_image_ctx, &mock_request, r]() { + mock_image_ctx.image_ctx->op_work_queue->queue( + new FunctionContext([&mock_request, r](int _) { + mock_request.send_op_impl(r); + }), 0); + })); + } + +}; + +TEST_F(TestMockOperationRequest, SendJournalDisabled) { + REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockTestImageCtx mock_image_ctx(*ictx); + MockJournal mock_journal; + mock_image_ctx.journal = &mock_journal; + + C_SaferCond ctx; + MockRequest *mock_request = new MockRequest(mock_image_ctx, &ctx, 0); + + InSequence seq; + expect_can_affect_io(*mock_request, false); + expect_is_journal_appending(mock_journal, false); + expect_send_op(*mock_request, 0); + + { + RWLock::RLocker owner_locker(mock_image_ctx.owner_lock); + mock_request->send(); + } + + ASSERT_EQ(0, ctx.wait()); +} + +TEST_F(TestMockOperationRequest, SendAffectsIOJournalDisabled) { + REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockTestImageCtx mock_image_ctx(*ictx); + MockJournal mock_journal; + mock_image_ctx.journal = &mock_journal; + + C_SaferCond ctx; + MockRequest *mock_request = new MockRequest(mock_image_ctx, &ctx, 0); + + InSequence seq; + expect_can_affect_io(*mock_request, true); + expect_send_op_affects_io(mock_image_ctx, *mock_request, 0); + expect_can_affect_io(*mock_request, true); + expect_is_journal_replaying(mock_journal, false); + expect_is_journal_appending(mock_journal, false); + + { + RWLock::RLocker owner_locker(mock_image_ctx.owner_lock); + mock_request->send(); + } + + ASSERT_EQ(0, ctx.wait()); +} + +} // namespace operation +} // namespace librbd diff --git a/src/test/librbd/operation/test_mock_ResizeRequest.cc b/src/test/librbd/operation/test_mock_ResizeRequest.cc index 86bde5061e429..b7cf365eae019 100644 --- a/src/test/librbd/operation/test_mock_ResizeRequest.cc +++ b/src/test/librbd/operation/test_mock_ResizeRequest.cc @@ -157,7 +157,7 @@ TEST_F(TestMockOperationResizeRequest, NoOpSuccess) { InSequence seq; expect_block_writes(mock_image_ctx, 0); - expect_append_op_event(mock_image_ctx, 0); + expect_append_op_event(mock_image_ctx, true, 0); expect_unblock_writes(mock_image_ctx); expect_commit_op_event(mock_image_ctx, 0); ASSERT_EQ(0, when_resize(mock_image_ctx, ictx->size, true, 0, false)); @@ -176,7 +176,7 @@ TEST_F(TestMockOperationResizeRequest, GrowSuccess) { InSequence seq; expect_block_writes(mock_image_ctx, 0); - expect_append_op_event(mock_image_ctx, 0); + expect_append_op_event(mock_image_ctx, true, 0); expect_unblock_writes(mock_image_ctx); expect_grow_object_map(mock_image_ctx); expect_block_writes(mock_image_ctx, 0); @@ -199,7 +199,7 @@ TEST_F(TestMockOperationResizeRequest, ShrinkSuccess) { InSequence seq; expect_block_writes(mock_image_ctx, 0); - expect_append_op_event(mock_image_ctx, 0); + expect_append_op_event(mock_image_ctx, true, 0); expect_unblock_writes(mock_image_ctx); MockTrimRequest mock_trim_request; @@ -260,7 +260,7 @@ TEST_F(TestMockOperationResizeRequest, TrimError) { InSequence seq; expect_block_writes(mock_image_ctx, 0); - expect_append_op_event(mock_image_ctx, 0); + expect_append_op_event(mock_image_ctx, true, 0); expect_unblock_writes(mock_image_ctx); MockTrimRequest mock_trim_request; @@ -282,7 +282,7 @@ TEST_F(TestMockOperationResizeRequest, InvalidateCacheError) { InSequence seq; expect_block_writes(mock_image_ctx, 0); - expect_append_op_event(mock_image_ctx, 0); + expect_append_op_event(mock_image_ctx, true, 0); expect_unblock_writes(mock_image_ctx); MockTrimRequest mock_trim_request; @@ -305,7 +305,7 @@ TEST_F(TestMockOperationResizeRequest, PostBlockWritesError) { InSequence seq; expect_block_writes(mock_image_ctx, 0); - expect_append_op_event(mock_image_ctx, 0); + expect_append_op_event(mock_image_ctx, true, 0); expect_unblock_writes(mock_image_ctx); expect_grow_object_map(mock_image_ctx); expect_block_writes(mock_image_ctx, -EINVAL); @@ -327,7 +327,7 @@ TEST_F(TestMockOperationResizeRequest, UpdateHeaderError) { InSequence seq; expect_block_writes(mock_image_ctx, 0); - expect_append_op_event(mock_image_ctx, 0); + expect_append_op_event(mock_image_ctx, true, 0); expect_unblock_writes(mock_image_ctx); expect_grow_object_map(mock_image_ctx); expect_block_writes(mock_image_ctx, 0); @@ -352,7 +352,7 @@ TEST_F(TestMockOperationResizeRequest, JournalAppendError) { InSequence seq; expect_block_writes(mock_image_ctx, 0); - expect_append_op_event(mock_image_ctx, -EINVAL); + expect_append_op_event(mock_image_ctx, true, -EINVAL); expect_unblock_writes(mock_image_ctx); ASSERT_EQ(-EINVAL, when_resize(mock_image_ctx, ictx->size, true, 0, false)); } diff --git a/src/test/librbd/operation/test_mock_SnapshotRollbackRequest.cc b/src/test/librbd/operation/test_mock_SnapshotRollbackRequest.cc index a1bfb6629782c..addb32b279b26 100644 --- a/src/test/librbd/operation/test_mock_SnapshotRollbackRequest.cc +++ b/src/test/librbd/operation/test_mock_SnapshotRollbackRequest.cc @@ -199,7 +199,7 @@ TEST_F(TestMockOperationSnapshotRollbackRequest, Success) { InSequence seq; MockResizeRequest mock_resize_request; - expect_append_op_event(mock_image_ctx, 0); + expect_append_op_event(mock_image_ctx, false, 0); expect_block_writes(mock_image_ctx, 0); expect_resize(mock_image_ctx, mock_resize_request, 0); expect_rollback_object_map(mock_image_ctx, *mock_object_map); @@ -224,7 +224,7 @@ TEST_F(TestMockOperationSnapshotRollbackRequest, BlockWritesError) { expect_op_work_queue(mock_image_ctx); InSequence seq; - expect_append_op_event(mock_image_ctx, 0); + expect_append_op_event(mock_image_ctx, false, 0); expect_block_writes(mock_image_ctx, -EINVAL); expect_commit_op_event(mock_image_ctx, -EINVAL); expect_unblock_writes(mock_image_ctx); @@ -244,7 +244,7 @@ TEST_F(TestMockOperationSnapshotRollbackRequest, SkipResize) { expect_op_work_queue(mock_image_ctx); InSequence seq; - expect_append_op_event(mock_image_ctx, 0); + expect_append_op_event(mock_image_ctx, false, 0); expect_block_writes(mock_image_ctx, 0); expect_get_image_size(mock_image_ctx, 345); expect_rollback_object_map(mock_image_ctx, *mock_object_map); @@ -270,7 +270,7 @@ TEST_F(TestMockOperationSnapshotRollbackRequest, ResizeError) { InSequence seq; MockResizeRequest mock_resize_request; - expect_append_op_event(mock_image_ctx, 0); + expect_append_op_event(mock_image_ctx, false, 0); expect_block_writes(mock_image_ctx, 0); expect_resize(mock_image_ctx, mock_resize_request, -EINVAL); expect_commit_op_event(mock_image_ctx, -EINVAL); @@ -292,7 +292,7 @@ TEST_F(TestMockOperationSnapshotRollbackRequest, RollbackObjectsError) { InSequence seq; MockResizeRequest mock_resize_request; - expect_append_op_event(mock_image_ctx, 0); + expect_append_op_event(mock_image_ctx, false, 0); expect_block_writes(mock_image_ctx, 0); expect_resize(mock_image_ctx, mock_resize_request, 0); expect_rollback_object_map(mock_image_ctx, mock_object_map); @@ -316,7 +316,7 @@ TEST_F(TestMockOperationSnapshotRollbackRequest, InvalidateCacheError) { InSequence seq; MockResizeRequest mock_resize_request; - expect_append_op_event(mock_image_ctx, 0); + expect_append_op_event(mock_image_ctx, false, 0); expect_block_writes(mock_image_ctx, 0); expect_resize(mock_image_ctx, mock_resize_request, 0); expect_rollback_object_map(mock_image_ctx, *mock_object_map); diff --git a/src/test/librbd/test_mock_AioImageRequest.cc b/src/test/librbd/test_mock_AioImageRequest.cc new file mode 100644 index 0000000000000..2cd6b162a4408 --- /dev/null +++ b/src/test/librbd/test_mock_AioImageRequest.cc @@ -0,0 +1,257 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/librbd/test_mock_fixture.h" +#include "test/librbd/test_support.h" +#include "test/librbd/mock/MockImageCtx.h" +#include "test/librbd/mock/MockJournal.h" +#include "librbd/AioImageRequest.h" +#include "librbd/AioObjectRequest.h" + +namespace librbd { +namespace { + +struct MockTestImageCtx : public MockImageCtx { + MockTestImageCtx(ImageCtx &image_ctx) : MockImageCtx(image_ctx) { + } +}; + +} // anonymous namespace + +namespace util { + +inline ImageCtx *get_image_ctx(MockTestImageCtx *image_ctx) { + return image_ctx->image_ctx; +} + +} // namespace util + +template <> +struct AioObjectRequest : public AioObjectRequestHandle { + static AioObjectRequest* s_instance; + Context *on_finish = nullptr; + + static AioObjectRequest* create_remove(librbd::MockTestImageCtx *ictx, + const std::string &oid, + uint64_t object_no, + const ::SnapContext &snapc, + Context *completion) { + assert(s_instance != nullptr); + s_instance->on_finish = completion; + return s_instance; + } + + static AioObjectRequest* create_truncate(librbd::MockTestImageCtx *ictx, + const std::string &oid, + uint64_t object_no, + uint64_t object_off, + const ::SnapContext &snapc, + Context *completion) { + assert(s_instance != nullptr); + s_instance->on_finish = completion; + return s_instance; + } + + static AioObjectRequest* create_write(librbd::MockTestImageCtx *ictx, + const std::string &oid, + uint64_t object_no, + uint64_t object_off, + const ceph::bufferlist &data, + const ::SnapContext &snapc, + Context *completion, int op_flags) { + assert(s_instance != nullptr); + s_instance->on_finish = completion; + return s_instance; + } + + static AioObjectRequest* create_zero(librbd::MockTestImageCtx *ictx, + const std::string &oid, + uint64_t object_no, uint64_t object_off, + uint64_t object_len, + const ::SnapContext &snapc, + Context *completion) { + assert(s_instance != nullptr); + s_instance->on_finish = completion; + return s_instance; + } + + AioObjectRequest() { + assert(s_instance == nullptr); + s_instance = this; + } + ~AioObjectRequest() { + s_instance = nullptr; + } + + MOCK_METHOD1(complete, void(int)); + MOCK_METHOD0(send, void()); +}; + +template <> +struct AioObjectRead : public AioObjectRequest { + typedef std::vector > Extents; + typedef std::map ExtentMap; + + static AioObjectRead* s_instance; + + static AioObjectRead* create(librbd::MockTestImageCtx *ictx, + const std::string &oid, + uint64_t objectno, uint64_t offset, + uint64_t len, Extents &buffer_extents, + librados::snap_t snap_id, bool sparse, + Context *completion, int op_flags) { + assert(s_instance != nullptr); + s_instance->on_finish = completion; + return s_instance; + } + + AioObjectRead() { + assert(s_instance == nullptr); + s_instance = this; + } + ~AioObjectRead() { + s_instance = nullptr; + } + + MOCK_CONST_METHOD0(get_offset, uint64_t()); + MOCK_CONST_METHOD0(get_length, uint64_t()); + MOCK_METHOD0(data, ceph::bufferlist &()); + MOCK_CONST_METHOD0(get_buffer_extents, const Extents &()); + MOCK_METHOD0(get_extent_map, ExtentMap &()); + +}; + +AioObjectRequest* AioObjectRequest::s_instance = nullptr; +AioObjectRead* AioObjectRead::s_instance = nullptr; + +} // namespace librbd + +#include "librbd/AioImageRequest.cc" +template class librbd::AioImageRequest; + +namespace librbd { + +using ::testing::_; +using ::testing::InSequence; +using ::testing::Invoke; +using ::testing::Return; +using ::testing::WithArg; + +struct TestMockAioImageRequest : public TestMockFixture { + typedef AioImageRequest MockAioImageRequest; + typedef AioImageWrite MockAioImageWrite; + typedef AioImageDiscard MockAioImageDiscard; + typedef AioImageFlush MockAioImageFlush; + typedef AioObjectRequest MockAioObjectRequest; + typedef AioObjectRead MockAioObjectRead; + + void expect_is_journal_appending(MockJournal &mock_journal, bool appending) { + EXPECT_CALL(mock_journal, is_journal_appending()) + .WillOnce(Return(appending)); + } + + void expect_write_to_cache(MockImageCtx &mock_image_ctx, + const object_t &object, + uint64_t offset, uint64_t length, + uint64_t journal_tid, int r) { + EXPECT_CALL(mock_image_ctx, write_to_cache(object, _, length, offset, _, _, + journal_tid)) + .WillOnce(WithArg<4>(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue))); + } + + void expect_object_request_send(MockImageCtx &mock_image_ctx, + MockAioObjectRequest &mock_object_request, + int r) { + EXPECT_CALL(mock_object_request, send()) + .WillOnce(Invoke([&mock_image_ctx, &mock_object_request, r]() { + mock_image_ctx.image_ctx->op_work_queue->queue( + mock_object_request.on_finish, r); + })); + } + + void expect_flush(MockImageCtx &mock_image_ctx, int r) { + EXPECT_CALL(mock_image_ctx, flush(_)) + .WillOnce(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue)); + } +}; + +TEST_F(TestMockAioImageRequest, AioWriteJournalAppendDisabled) { + REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockAioObjectRequest mock_aio_object_request; + MockTestImageCtx mock_image_ctx(*ictx); + MockJournal mock_journal; + mock_image_ctx.journal = &mock_journal; + + InSequence seq; + expect_is_journal_appending(mock_journal, false); + expect_write_to_cache(mock_image_ctx, ictx->get_object_name(0), + 0, 1, 0, 0); + + C_SaferCond aio_comp_ctx; + AioCompletion *aio_comp = AioCompletion::create_and_start( + &aio_comp_ctx, ictx, AIO_TYPE_WRITE); + MockAioImageWrite mock_aio_image_write(mock_image_ctx, aio_comp, 0, 1, "1", + 0); + { + RWLock::RLocker owner_locker(mock_image_ctx.owner_lock); + mock_aio_image_write.send(); + } + ASSERT_EQ(0, aio_comp_ctx.wait()); +} + +TEST_F(TestMockAioImageRequest, AioDiscardJournalAppendDisabled) { + REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockAioObjectRequest mock_aio_object_request; + MockTestImageCtx mock_image_ctx(*ictx); + MockJournal mock_journal; + mock_image_ctx.journal = &mock_journal; + + InSequence seq; + expect_is_journal_appending(mock_journal, false); + expect_object_request_send(mock_image_ctx, mock_aio_object_request, 0); + + C_SaferCond aio_comp_ctx; + AioCompletion *aio_comp = AioCompletion::create_and_start( + &aio_comp_ctx, ictx, AIO_TYPE_DISCARD); + MockAioImageDiscard mock_aio_image_discard(mock_image_ctx, aio_comp, 0, 1); + { + RWLock::RLocker owner_locker(mock_image_ctx.owner_lock); + mock_aio_image_discard.send(); + } + ASSERT_EQ(0, aio_comp_ctx.wait()); +} + +TEST_F(TestMockAioImageRequest, AioFlushJournalAppendDisabled) { + REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockTestImageCtx mock_image_ctx(*ictx); + MockJournal mock_journal; + mock_image_ctx.journal = &mock_journal; + + InSequence seq; + expect_is_journal_appending(mock_journal, false); + expect_flush(mock_image_ctx, 0); + + C_SaferCond aio_comp_ctx; + AioCompletion *aio_comp = AioCompletion::create_and_start( + &aio_comp_ctx, ictx, AIO_TYPE_FLUSH); + MockAioImageFlush mock_aio_image_flush(mock_image_ctx, aio_comp); + { + RWLock::RLocker owner_locker(mock_image_ctx.owner_lock); + mock_aio_image_flush.send(); + } + ASSERT_EQ(0, aio_comp_ctx.wait()); +} + +} // namespace librbd diff --git a/src/test/librbd/test_mock_Journal.cc b/src/test/librbd/test_mock_Journal.cc index 77adac0738c24..3785f2ce783e2 100644 --- a/src/test/librbd/test_mock_Journal.cc +++ b/src/test/librbd/test_mock_Journal.cc @@ -5,6 +5,7 @@ #include "test/journal/mock/MockJournaler.h" #include "test/librbd/test_support.h" #include "test/librbd/mock/MockImageCtx.h" +#include "test/librbd/mock/MockJournalPolicy.h" #include "common/Cond.h" #include "common/Mutex.h" #include "cls/journal/cls_journal_types.h" @@ -158,6 +159,7 @@ class TestMockJournal : public TestMockFixture { } void expect_shut_down_journaler(::journal::MockJournaler &mock_journaler) { + EXPECT_CALL(mock_journaler, remove_listener(_)); EXPECT_CALL(mock_journaler, shut_down(_)) .WillOnce(CompleteContext(0, NULL)); } @@ -196,6 +198,7 @@ class TestMockJournal : public TestMockFixture { EXPECT_CALL(mock_journaler, get_tags(0, _, _)) .WillOnce(DoAll(SetArgPointee<1>(tags), WithArg<2>(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue)))); + EXPECT_CALL(mock_journaler, add_listener(_)); } void expect_start_replay(MockJournalImageCtx &mock_image_ctx, @@ -305,7 +308,7 @@ class TestMockJournal : public TestMockFixture { uint64_t when_append_io_event(MockJournalImageCtx &mock_image_ctx, MockJournal &mock_journal, - AioObjectRequest *object_request = nullptr) { + AioObjectRequest<> *object_request = nullptr) { RWLock::RLocker owner_locker(mock_image_ctx.owner_lock); MockJournal::AioObjectRequests object_requests; if (object_request != nullptr) { @@ -714,6 +717,8 @@ TEST_F(TestMockJournal, ReplayOnDiskPreFlushError) { MockJournalReplay mock_journal_replay; expect_try_pop_front(mock_journaler, true, mock_replay_entry); + EXPECT_CALL(mock_journal_replay, decode(_, _)) + .WillOnce(Return(0)); Context *on_ready; EXPECT_CALL(mock_journal_replay, process(_, _, _)) .WillOnce(DoAll(SaveArg<1>(&on_ready), @@ -1151,5 +1156,34 @@ TEST_F(TestMockJournal, ExternalReplayCloseRequest) { ASSERT_EQ(0, close_ctx.wait()); } +TEST_F(TestMockJournal, AppendDisabled) { + REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockJournalImageCtx mock_image_ctx(*ictx); + MockJournal mock_journal(mock_image_ctx); + MockJournalPolicy mock_journal_policy; + + ::journal::MockJournaler mock_journaler; + open_journal(mock_image_ctx, mock_journal, mock_journaler); + BOOST_SCOPE_EXIT_ALL(&) { + close_journal(mock_journal, mock_journaler); + }; + + InSequence seq; + RWLock::RLocker snap_locker(mock_image_ctx.snap_lock); + EXPECT_CALL(mock_image_ctx, get_journal_policy()).WillOnce( + Return(ictx->get_journal_policy())); + ASSERT_TRUE(mock_journal.is_journal_appending()); + + EXPECT_CALL(mock_image_ctx, get_journal_policy()).WillOnce( + Return(&mock_journal_policy)); + EXPECT_CALL(mock_journal_policy, append_disabled()).WillOnce(Return(true)); + ASSERT_FALSE(mock_journal.is_journal_appending()); + + expect_shut_down_journaler(mock_journaler); +} } // namespace librbd diff --git a/src/test/librbd/test_mock_fixture.cc b/src/test/librbd/test_mock_fixture.cc index c2644eb534773..3fb246d28d37c 100644 --- a/src/test/librbd/test_mock_fixture.cc +++ b/src/test/librbd/test_mock_fixture.cc @@ -84,6 +84,11 @@ void TestMockFixture::initialize_features(librbd::ImageCtx *ictx, } } +void TestMockFixture::expect_is_journal_appending(librbd::MockJournal &mock_journal, + bool appending) { + EXPECT_CALL(mock_journal, is_journal_appending()).WillOnce(Return(appending)); +} + void TestMockFixture::expect_is_journal_replaying(librbd::MockJournal &mock_journal) { EXPECT_CALL(mock_journal, is_journal_replaying()).WillOnce(Return(false)); } @@ -99,9 +104,13 @@ void TestMockFixture::expect_allocate_op_tid(librbd::MockImageCtx &mock_image_ct } } -void TestMockFixture::expect_append_op_event(librbd::MockImageCtx &mock_image_ctx, int r) { +void TestMockFixture::expect_append_op_event(librbd::MockImageCtx &mock_image_ctx, + bool can_affect_io, int r) { if (mock_image_ctx.journal != nullptr) { - expect_is_journal_replaying(*mock_image_ctx.journal); + if (can_affect_io) { + expect_is_journal_replaying(*mock_image_ctx.journal); + } + expect_is_journal_appending(*mock_image_ctx.journal, true); expect_allocate_op_tid(mock_image_ctx); EXPECT_CALL(*mock_image_ctx.journal, append_op_event_mock(_, _, _)) .WillOnce(WithArg<2>(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue))); @@ -110,7 +119,7 @@ void TestMockFixture::expect_append_op_event(librbd::MockImageCtx &mock_image_ct void TestMockFixture::expect_commit_op_event(librbd::MockImageCtx &mock_image_ctx, int r) { if (mock_image_ctx.journal != nullptr) { - expect_is_journal_replaying(*mock_image_ctx.journal); + expect_is_journal_appending(*mock_image_ctx.journal, true); expect_is_journal_ready(*mock_image_ctx.journal); EXPECT_CALL(*mock_image_ctx.journal, commit_op_event(1U, r, _)) .WillOnce(WithArg<2>(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue))); diff --git a/src/test/librbd/test_mock_fixture.h b/src/test/librbd/test_mock_fixture.h index bd5a2ac84c686..b06ca5bf7209c 100644 --- a/src/test/librbd/test_mock_fixture.h +++ b/src/test/librbd/test_mock_fixture.h @@ -80,10 +80,13 @@ class TestMockFixture : public TestFixture { librbd::MockJournal &mock_journal, librbd::MockObjectMap &mock_object_map); + void expect_is_journal_appending(librbd::MockJournal &mock_journal, + bool appending); void expect_is_journal_replaying(librbd::MockJournal &mock_journal); void expect_is_journal_ready(librbd::MockJournal &mock_journal); void expect_allocate_op_tid(librbd::MockImageCtx &mock_image_ctx); - void expect_append_op_event(librbd::MockImageCtx &mock_image_ctx, int r); + void expect_append_op_event(librbd::MockImageCtx &mock_image_ctx, + bool can_affect_io, int r); void expect_commit_op_event(librbd::MockImageCtx &mock_image_ctx, int r); private: diff --git a/src/test/rbd_mirror/image_sync/test_mock_ImageCopyRequest.cc b/src/test/rbd_mirror/image_sync/test_mock_ImageCopyRequest.cc index d924a3cab2872..79e7fa85d9a7b 100644 --- a/src/test/rbd_mirror/image_sync/test_mock_ImageCopyRequest.cc +++ b/src/test/rbd_mirror/image_sync/test_mock_ImageCopyRequest.cc @@ -109,7 +109,7 @@ class TestMockImageSyncImageCopyRequest : public TestMockFixture { void expect_get_snap_id(librbd::MockTestImageCtx &mock_image_ctx) { EXPECT_CALL(mock_image_ctx, get_snap_id(_)) .WillRepeatedly(Invoke([&mock_image_ctx](std::string snap_name) { - RWLock::RLocker snap_locker(mock_image_ctx.image_ctx->snap_lock); + assert(mock_image_ctx.image_ctx->snap_lock.is_locked()); return mock_image_ctx.image_ctx->get_snap_id(snap_name); })); } diff --git a/src/test/rbd_mirror/image_sync/test_mock_ObjectCopyRequest.cc b/src/test/rbd_mirror/image_sync/test_mock_ObjectCopyRequest.cc index e27bcb6db1780..b018f16c21b30 100644 --- a/src/test/rbd_mirror/image_sync/test_mock_ObjectCopyRequest.cc +++ b/src/test/rbd_mirror/image_sync/test_mock_ObjectCopyRequest.cc @@ -179,8 +179,8 @@ class TestMockImageSyncObjectCopyRequest : public TestMockFixture { }))); } else { expect.WillOnce(WithArg<5>(Invoke([&mock_image_ctx, snap_id, state, r](Context *ctx) { - RWLock::RLocker snap_locker(mock_image_ctx.image_ctx->snap_lock); - RWLock::WLocker object_map_locker(mock_image_ctx.image_ctx->object_map_lock); + assert(mock_image_ctx.image_ctx->snap_lock.is_locked()); + assert(mock_image_ctx.image_ctx->object_map_lock.is_wlocked()); mock_image_ctx.image_ctx->object_map->aio_update(snap_id, 0, 1, state, boost::none, ctx); diff --git a/src/tools/rbd_mirror/ImageDeleter.cc b/src/tools/rbd_mirror/ImageDeleter.cc index e4ad55ae60f3e..234c2401ffde9 100644 --- a/src/tools/rbd_mirror/ImageDeleter.cc +++ b/src/tools/rbd_mirror/ImageDeleter.cc @@ -74,11 +74,11 @@ class StatusCommand : public ImageDeleterAdminSocketCommand { }; struct DeleteJournalPolicy : public librbd::journal::Policy { - virtual void allocate_tag_on_lock(Context *on_finish) { - on_finish->complete(0); + virtual bool append_disabled() const { + return true; } - virtual void cancel_external_replay(Context *on_finish) { + virtual void allocate_tag_on_lock(Context *on_finish) { on_finish->complete(0); } }; diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc index 63597ebf4312e..cbb42b929bdb3 100644 --- a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc +++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc @@ -44,17 +44,15 @@ struct MirrorJournalPolicy : public librbd::journal::Policy { MirrorJournalPolicy(ContextWQ *work_queue) : work_queue(work_queue) { } + virtual bool append_disabled() const { + // avoid recording any events to the local journal + return true; + } + virtual void allocate_tag_on_lock(Context *on_finish) { // rbd-mirror will manually create tags by copying them from the peer work_queue->queue(on_finish, 0); } - - virtual void cancel_external_replay(Context *on_finish) { - // TODO: journal is being closed due to a comms error. This means - // the journal is being closed and the exclusive lock is being released. - // ImageReplayer needs to restart. - } - }; } // anonymous namespace