Skip to content

Commit

Permalink
librbd: batch ObjectMap updations upon trim
Browse files Browse the repository at this point in the history
Shrinking a clone which has snapshots and does not share
majority of objects with its parent (i.e., there are less
objects to be copied up) involves huge number of object
map updates -- two (pre, post) per object. This results
in lots of requests to be send to OSDs especially when
trimming a gigantus image. This situation can be optimized
by sending batch ObjectMap updates for an object range
thereby significantly cutting down OSD traffic resulting
in faster trim times.

Fixes: http://tracker.ceph.com/issues/17356
Signed-off-by: Venky Shankar <vshankar@redhat.com>
  • Loading branch information
vshankar committed Oct 26, 2016
1 parent d712d20 commit f66e236
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 48 deletions.
10 changes: 9 additions & 1 deletion src/librbd/AioObjectRequest.cc
Expand Up @@ -446,9 +446,10 @@ void AbstractAioObjectWrite::send_pre() {
m_object_exist = m_ictx->object_map->object_may_exist(m_object_no);

uint8_t new_state;
pre_object_map_update(&new_state);

RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
pre_object_map_update(&new_state);

if (m_ictx->object_map->update_required(m_object_no, new_state)) {
ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " "
<< m_object_off << "~" << m_object_len
Expand Down Expand Up @@ -623,6 +624,13 @@ void AioObjectTruncate::send_write() {
}
}

void AioObjectTrim::pre_object_map_update(uint8_t *new_state) {
*new_state = OBJECT_PENDING;

// object map state (before pre) decides if a post updation is needed
need_post = m_ictx->object_map->update_required(m_object_no, *new_state);
}

} // namespace librbd

template class librbd::AioObjectRequest<librbd::ImageCtx>;
Expand Down
11 changes: 6 additions & 5 deletions src/librbd/AioObjectRequest.h
Expand Up @@ -333,7 +333,7 @@ class AioObjectTrim : public AbstractAioObjectWrite {
AioObjectTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
const ::SnapContext &snapc, Context *completion)
: AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion,
true) {
true), need_post(true) {
}

protected:
Expand All @@ -345,13 +345,14 @@ class AioObjectTrim : public AbstractAioObjectWrite {
return "remove (trim)";
}

virtual void pre_object_map_update(uint8_t *new_state) {
*new_state = OBJECT_PENDING;
}
virtual void pre_object_map_update(uint8_t *new_state);

virtual bool post_object_map_update() {
return true;
return need_post;
}

private:
bool need_post;
};

class AioObjectTruncate : public AbstractAioObjectWrite {
Expand Down
146 changes: 113 additions & 33 deletions src/librbd/operation/TrimRequest.cc
Expand Up @@ -131,8 +131,18 @@ bool TrimRequest<I>::should_complete(int r)

RWLock::RLocker owner_lock(image_ctx.owner_lock);
switch (m_state) {
case STATE_PRE_COPYUP:
ldout(cct, 5) << " PRE_COPYUP" << dendl;
send_copyup_objects();
break;

case STATE_COPYUP_OBJECTS:
ldout(cct, 5) << " COPYUP_OBJECTS" << dendl;
send_post_copyup();
break;

case STATE_POST_COPYUP:
ldout(cct, 5) << " POST_COPYUP" << dendl;
send_pre_remove();
break;

Expand Down Expand Up @@ -170,58 +180,33 @@ bool TrimRequest<I>::should_complete(int r)

template <typename I>
void TrimRequest<I>::send() {
send_copyup_objects();
send_pre_copyup();
}

template <typename I>
template<typename I>
void TrimRequest<I>::send_copyup_objects() {
I &image_ctx = this->m_image_ctx;
assert(image_ctx.owner_lock.is_locked());
assert(image_ctx.exclusive_lock == nullptr ||
image_ctx.exclusive_lock->is_lock_owner());

if (m_delete_start >= m_num_objects) {
send_clean_boundary();
return;
}
ldout(image_ctx.cct, 5) << this << " send_copyup_objects: "
<< " start object=" << m_copyup_start << ", "
<< " end object=" << m_copyup_end << dendl;
m_state = STATE_COPYUP_OBJECTS;

::SnapContext snapc;
bool has_snapshots;
uint64_t parent_overlap;
{
RWLock::RLocker snap_locker(image_ctx.snap_lock);
RWLock::RLocker parent_locker(image_ctx.parent_lock);

snapc = image_ctx.snapc;
has_snapshots = !image_ctx.snaps.empty();
int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &parent_overlap);
assert(r == 0);
}

// copyup is only required for portion of image that overlaps parent
uint64_t copyup_end = Striper::get_num_objects(image_ctx.layout,
parent_overlap);
// TODO: protect against concurrent shrink and snap create?
if (copyup_end <= m_delete_start || !has_snapshots) {
send_pre_remove();
return;
}

uint64_t copyup_start = m_delete_start;
m_delete_start = copyup_end;

ldout(image_ctx.cct, 5) << this << " send_copyup_objects: "
<< " start object=" << copyup_start << ", "
<< " end object=" << copyup_end << dendl;
m_state = STATE_COPYUP_OBJECTS;

Context *ctx = this->create_callback_context();
typename AsyncObjectThrottle<I>::ContextFactory context_factory(
boost::lambda::bind(boost::lambda::new_ptr<C_CopyupObject<I> >(),
boost::lambda::_1, &image_ctx, snapc, boost::lambda::_2));
AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
this, image_ctx, context_factory, ctx, &m_prog_ctx, copyup_start,
copyup_end);
this, image_ctx, context_factory, ctx, &m_prog_ctx, m_copyup_start,
m_copyup_end);
throttle->start_ops(image_ctx.concurrent_management_ops);
}

Expand All @@ -245,6 +230,68 @@ void TrimRequest<I>::send_remove_objects() {
throttle->start_ops(image_ctx.concurrent_management_ops);
}

template<typename I>
void TrimRequest<I>::send_pre_copyup() {
I &image_ctx = this->m_image_ctx;
assert(image_ctx.owner_lock.is_locked());

if (m_delete_start >= m_num_objects) {
send_clean_boundary();
return;
}

bool has_snapshots;
uint64_t parent_overlap;
{
RWLock::RLocker snap_locker(image_ctx.snap_lock);
RWLock::RLocker parent_locker(image_ctx.parent_lock);

has_snapshots = !image_ctx.snaps.empty();
int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &parent_overlap);
assert(r == 0);
}

// copyup is only required for portion of image that overlaps parent
m_copyup_end = Striper::get_num_objects(image_ctx.layout, parent_overlap);

// TODO: protect against concurrent shrink and snap create?
// skip to remove if no copyup is required.
if (m_copyup_end <= m_delete_start || !has_snapshots) {
send_pre_remove();
return;
}

m_copyup_start = m_delete_start;
m_delete_start = m_copyup_end;

bool copyup_objects = false;
{
RWLock::RLocker snap_locker(image_ctx.snap_lock);
if (image_ctx.object_map == nullptr) {
copyup_objects = true;
} else {
ldout(image_ctx.cct, 5) << this << " send_pre_copyup: "
<< " copyup_start=" << m_copyup_start
<< " copyup_end=" << m_copyup_end << dendl;
m_state = STATE_PRE_COPYUP;

assert(image_ctx.exclusive_lock->is_lock_owner());

Context *ctx = this->create_callback_context();
RWLock::WLocker object_map_locker(image_ctx.object_map_lock);
if (!image_ctx.object_map->aio_update(m_copyup_start, m_copyup_end,
OBJECT_PENDING, OBJECT_EXISTS, ctx)) {
delete ctx;
copyup_objects = true;
}
}
}

if (copyup_objects) {
send_copyup_objects();
}
}

template <typename I>
void TrimRequest<I>::send_pre_remove() {
I &image_ctx = this->m_image_ctx;
Expand Down Expand Up @@ -286,6 +333,39 @@ void TrimRequest<I>::send_pre_remove() {
}
}

template<typename I>
void TrimRequest<I>::send_post_copyup() {
I &image_ctx = this->m_image_ctx;
assert(image_ctx.owner_lock.is_locked());

bool pre_remove_objects = false;
{
RWLock::RLocker snap_locker(image_ctx.snap_lock);
if (image_ctx.object_map == nullptr) {
pre_remove_objects = true;
} else {
ldout(image_ctx.cct, 5) << this << " send_post_copyup:"
<< " copyup_start=" << m_copyup_start
<< " copyup_end=" << m_copyup_end << dendl;
m_state = STATE_POST_COPYUP;

assert(image_ctx.exclusive_lock->is_lock_owner());

Context *ctx = this->create_callback_context();
RWLock::WLocker object_map_locker(image_ctx.object_map_lock);
if (!image_ctx.object_map->aio_update(m_copyup_start, m_copyup_end,
OBJECT_NONEXISTENT, OBJECT_PENDING, ctx)) {
delete ctx;
pre_remove_objects = true;
}
}
}

if (pre_remove_objects) {
send_pre_remove();
}
}

template <typename I>
void TrimRequest<I>::send_post_remove() {
I &image_ctx = this->m_image_ctx;
Expand Down
30 changes: 21 additions & 9 deletions src/librbd/operation/TrimRequest.h
Expand Up @@ -34,14 +34,17 @@ class TrimRequest : public AsyncRequest<ImageCtxT>
* @verbatim
*
* <start> . . . . > STATE_FINISHED . . . . . . . . .
* | . .
* | . . . . . . . . . . . . .
* | . .
* v . .
* STATE_COPYUP_OBJECTS . . . . .
* | . . .
* | . . .
* v v v .
* | . . . . . . . . . . > . . . . . . . . . .
* | / . .
* STATE_PRE_COPYUP ---> STATE_COPYUP_OBJECTS . .
* | . .
* /-----------------------/ v .
* | . .
* v . .
* STATE_POST_COPYUP. . . > . . .
* | . . . . . . . . . . < . . . . . . . . . .
* | | . .
* v v v .
* STATE_PRE_REMOVE ---> STATE_REMOVE_OBJECTS .
* | . . .
* /-----------------------/ . . . . . . . .
Expand All @@ -64,7 +67,9 @@ class TrimRequest : public AsyncRequest<ImageCtxT>
*/

enum State {
STATE_PRE_COPYUP,
STATE_COPYUP_OBJECTS,
STATE_POST_COPYUP,
STATE_PRE_REMOVE,
STATE_REMOVE_OBJECTS,
STATE_POST_REMOVE,
Expand All @@ -83,14 +88,21 @@ class TrimRequest : public AsyncRequest<ImageCtxT>
uint64_t m_new_size;
ProgressContext &m_prog_ctx;

uint64_t m_copyup_start;
uint64_t m_copyup_end;

TrimRequest(ImageCtxT &image_ctx, Context *on_finish,
uint64_t original_size, uint64_t new_size,
ProgressContext &prog_ctx);

void send_pre_copyup();
void send_copyup_objects();
void send_remove_objects();
void send_post_copyup();

void send_pre_remove();
void send_remove_objects();
void send_post_remove();

void send_clean_boundary();
void send_finish(int r);
};
Expand Down

0 comments on commit f66e236

Please sign in to comment.