Skip to content

Commit

Permalink
Merge pull request #12909 from dillaman/wip-18290-jewel
Browse files Browse the repository at this point in the history
jewel: librbd: properly order concurrent updates to the object map

Reviewed-by: Jason Dillaman <dillaman@redhat.com>
  • Loading branch information
smithfarm committed Jan 30, 2017
2 parents 60bc353 + cdd6cbf commit 60a2037
Show file tree
Hide file tree
Showing 45 changed files with 964 additions and 251 deletions.
45 changes: 16 additions & 29 deletions src/librbd/AioObjectRequest.cc
Expand Up @@ -454,12 +454,10 @@ void AbstractAioObjectWrite::send() {
void AbstractAioObjectWrite::send_pre() {
assert(m_ictx->owner_lock.is_locked());

bool write = false;
{
RWLock::RLocker snap_lock(m_ictx->snap_lock);
if (m_ictx->object_map == nullptr) {
m_object_exist = true;
write = true;
} else {
// should have been flushed prior to releasing lock
assert(m_ictx->exclusive_lock->is_lock_owner());
Expand All @@ -469,27 +467,20 @@ void AbstractAioObjectWrite::send_pre() {
pre_object_map_update(&new_state);

RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
if (m_ictx->object_map->update_required(m_object_no, new_state)) {
ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " "
<< m_object_off << "~" << m_object_len
<< dendl;
m_state = LIBRBD_AIO_WRITE_PRE;

Context *ctx = util::create_context_callback<AioObjectRequest>(this);
bool updated = m_ictx->object_map->aio_update(m_object_no, new_state,
{}, ctx);
assert(updated);
} else {
write = true;
ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " "
<< m_object_off << "~" << m_object_len
<< dendl;
m_state = LIBRBD_AIO_WRITE_PRE;

if (m_ictx->object_map->aio_update<AioObjectRequest>(
CEPH_NOSNAP, m_object_no, new_state, {}, this)) {
return;
}
}
}

// avoid possible recursive lock attempts
if (write) {
// no object map update required
send_write();
}
// no object map update required
send_write();
}

bool AbstractAioObjectWrite::send_post() {
Expand All @@ -503,20 +494,16 @@ bool AbstractAioObjectWrite::send_post() {
assert(m_ictx->exclusive_lock->is_lock_owner());

RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
if (!m_ictx->object_map->update_required(m_object_no, OBJECT_NONEXISTENT)) {
return true;
}

ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " "
<< m_object_off << "~" << m_object_len << dendl;
m_state = LIBRBD_AIO_WRITE_POST;

Context *ctx = util::create_context_callback<AioObjectRequest>(this);
bool updated = m_ictx->object_map->aio_update(m_object_no,
OBJECT_NONEXISTENT,
OBJECT_PENDING, ctx);
assert(updated);
return false;
if (m_ictx->object_map->aio_update<AioObjectRequest>(
CEPH_NOSNAP, m_object_no, OBJECT_NONEXISTENT, OBJECT_PENDING, this)) {
return false;
}

return true;
}

void AbstractAioObjectWrite::send_write() {
Expand Down
172 changes: 172 additions & 0 deletions src/librbd/BlockGuard.h
@@ -0,0 +1,172 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab

#ifndef CEPH_LIBRBD_IO_BLOCK_GUARD_H
#define CEPH_LIBRBD_IO_BLOCK_GUARD_H

#include "include/int_types.h"
#include "common/dout.h"
#include "common/Mutex.h"
#include <boost/intrusive/list.hpp>
#include <boost/intrusive/set.hpp>
#include <deque>
#include <list>
#include "include/assert.h"

#define dout_subsys ceph_subsys_rbd
#undef dout_prefix
#define dout_prefix *_dout << "librbd::BlockGuard: " << this << " " \
<< __func__ << ": "

namespace librbd {

struct BlockExtent {
uint64_t block_start = 0;
uint64_t block_end = 0;

BlockExtent() {
}
BlockExtent(uint64_t block_start, uint64_t block_end)
: block_start(block_start), block_end(block_end) {
}
};

struct BlockGuardCell {
};

/**
* Helper class to restrict and order concurrent IO to the same block. The
* definition of a block is dependent upon the user of this class. It might
* represent a backing object, 512 byte sectors, etc.
*/
template <typename BlockOperation>
class BlockGuard {
private:
struct DetainedBlockExtent;

public:
typedef std::list<BlockOperation> BlockOperations;

BlockGuard(CephContext *cct)
: m_cct(cct), m_lock("librbd::BlockGuard::m_lock") {
}

BlockGuard(const BlockGuard&) = delete;
BlockGuard &operator=(const BlockGuard&) = delete;

/**
* Detain future IO for a range of blocks. the guard will assume
* ownership of the provided operation if the operation is blocked.
* @return 0 upon success and IO can be issued
* >0 if the IO is blocked,
* <0 upon error
*/
int detain(const BlockExtent &block_extent, BlockOperation *block_operation,
BlockGuardCell **cell) {
Mutex::Locker locker(m_lock);
ldout(m_cct, 20) << "block_start=" << block_extent.block_start << ", "
<< "block_end=" << block_extent.block_end << ", "
<< "free_slots=" << m_free_detained_block_extents.size()
<< dendl;

DetainedBlockExtent *detained_block_extent;
auto it = m_detained_block_extents.find(block_extent);
if (it != m_detained_block_extents.end()) {
// request against an already detained block
detained_block_extent = &(*it);
if (block_operation != nullptr) {
detained_block_extent->block_operations.emplace_back(
std::move(*block_operation));
}

// alert the caller that the IO was detained
*cell = nullptr;
return detained_block_extent->block_operations.size();
} else {
if (!m_free_detained_block_extents.empty()) {
detained_block_extent = &m_free_detained_block_extents.front();
detained_block_extent->block_operations.clear();
m_free_detained_block_extents.pop_front();
} else {
ldout(m_cct, 20) << "no free detained block cells" << dendl;
m_detained_block_extent_pool.emplace_back();
detained_block_extent = &m_detained_block_extent_pool.back();
}

detained_block_extent->block_extent = block_extent;
m_detained_block_extents.insert(*detained_block_extent);
*cell = reinterpret_cast<BlockGuardCell*>(detained_block_extent);
return 0;
}
}

/**
* Release any detained IO operations from the provided cell.
*/
void release(BlockGuardCell *cell, BlockOperations *block_operations) {
Mutex::Locker locker(m_lock);

assert(cell != nullptr);
auto &detained_block_extent = reinterpret_cast<DetainedBlockExtent &>(
*cell);
ldout(m_cct, 20) << "block_start="
<< detained_block_extent.block_extent.block_start << ", "
<< "block_end="
<< detained_block_extent.block_extent.block_end << ", "
<< "pending_ops="
<< (detained_block_extent.block_operations.empty() ?
0 : detained_block_extent.block_operations.size() - 1)
<< dendl;

*block_operations = std::move(detained_block_extent.block_operations);
m_detained_block_extents.erase(detained_block_extent.block_extent);
m_free_detained_block_extents.push_back(detained_block_extent);
}

private:
struct DetainedBlockExtent : public boost::intrusive::list_base_hook<>,
public boost::intrusive::set_base_hook<> {
DetainedBlockExtent() {
}
DetainedBlockExtent(const BlockExtent &block_extent)
: block_extent(block_extent) {
}

BlockExtent block_extent;
BlockOperations block_operations;
};

struct DetainedBlockExtentCompare {
bool operator()(const DetainedBlockExtent &lhs,
const DetainedBlockExtent &rhs) const {
// check for range overlap (lhs < rhs)
if (lhs.block_extent.block_end <= rhs.block_extent.block_start) {
return true;
}
return false;
}
};

typedef std::deque<DetainedBlockExtent> DetainedBlockExtentsPool;
typedef boost::intrusive::list<DetainedBlockExtent> DetainedBlockExtents;
typedef boost::intrusive::set<
DetainedBlockExtent,
boost::intrusive::compare<DetainedBlockExtentCompare> >
BlockExtentToDetainedBlockExtents;

CephContext *m_cct;

Mutex m_lock;
DetainedBlockExtentsPool m_detained_block_extent_pool;
DetainedBlockExtents m_free_detained_block_extents;
BlockExtentToDetainedBlockExtents m_detained_block_extents;

};

} // namespace librbd

#undef dout_subsys
#undef dout_prefix
#define dout_prefix *_dout

#endif // CEPH_LIBRBD_IO_BLOCK_GUARD_H
10 changes: 5 additions & 5 deletions src/librbd/CopyupRequest.cc
Expand Up @@ -46,9 +46,8 @@ class UpdateObjectMap : public C_AsyncObjectThrottle<> {
RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock);
assert(m_image_ctx.exclusive_lock->is_lock_owner());
assert(m_image_ctx.object_map != nullptr);
bool sent = m_image_ctx.object_map->aio_update(m_object_no, OBJECT_EXISTS,
boost::optional<uint8_t>(),
this);
bool sent = m_image_ctx.object_map->aio_update<Context>(
CEPH_NOSNAP, m_object_no, OBJECT_EXISTS, {}, this);
return (sent ? 0 : 1);
}

Expand All @@ -64,8 +63,9 @@ class UpdateObjectMap : public C_AsyncObjectThrottle<> {
return 1;
}

m_image_ctx.object_map->aio_update(snap_id, m_object_no, m_object_no + 1,
state, boost::optional<uint8_t>(), this);
bool sent = m_image_ctx.object_map->aio_update<Context>(
snap_id, m_object_no, state, {}, this);
assert(sent);
return 0;
}

Expand Down
4 changes: 2 additions & 2 deletions src/librbd/DiffIterate.cc
Expand Up @@ -385,8 +385,8 @@ int DiffIterate::diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id,
}

BitVector<2> object_map;
std::string oid(ObjectMap::object_map_name(m_image_ctx.id,
current_snap_id));
std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id,
current_snap_id));
r = cls_client::object_map_load(&m_image_ctx.md_ctx, oid, &object_map);
if (r < 0) {
lderr(cct) << "diff_object_map: failed to load object map " << oid
Expand Down
4 changes: 2 additions & 2 deletions src/librbd/ImageCtx.cc
Expand Up @@ -995,8 +995,8 @@ struct C_InvalidateCache : public Context {
return new ExclusiveLock<ImageCtx>(*this);
}

ObjectMap *ImageCtx::create_object_map(uint64_t snap_id) {
return new ObjectMap(*this, snap_id);
ObjectMap<ImageCtx> *ImageCtx::create_object_map(uint64_t snap_id) {
return new ObjectMap<ImageCtx>(*this, snap_id);
}

Journal<ImageCtx> *ImageCtx::create_journal() {
Expand Down
6 changes: 3 additions & 3 deletions src/librbd/ImageCtx.h
Expand Up @@ -45,7 +45,7 @@ namespace librbd {
template <typename> class ImageWatcher;
template <typename> class Journal;
class LibrbdAdminSocketHook;
class ObjectMap;
template <typename> class ObjectMap;
template <typename> class Operations;
class LibrbdWriteback;

Expand Down Expand Up @@ -141,7 +141,7 @@ namespace librbd {
Operations<ImageCtx> *operations;

ExclusiveLock<ImageCtx> *exclusive_lock;
ObjectMap *object_map;
ObjectMap<ImageCtx> *object_map;

xlist<operation::ResizeRequest<ImageCtx>*> resize_reqs;

Expand Down Expand Up @@ -287,7 +287,7 @@ namespace librbd {
void apply_metadata(const std::map<std::string, bufferlist> &meta);

ExclusiveLock<ImageCtx> *create_exclusive_lock();
ObjectMap *create_object_map(uint64_t snap_id);
ObjectMap<ImageCtx> *create_object_map(uint64_t snap_id);
Journal<ImageCtx> *create_journal();

void clear_pending_completions();
Expand Down
1 change: 1 addition & 0 deletions src/librbd/Makefile.am
Expand Up @@ -97,6 +97,7 @@ noinst_HEADERS += \
librbd/AsyncObjectThrottle.h \
librbd/AsyncOperation.h \
librbd/AsyncRequest.h \
librbd/BlockGuard.h \
librbd/CopyupRequest.h \
librbd/DiffIterate.h \
librbd/ExclusiveLock.h \
Expand Down

0 comments on commit 60a2037

Please sign in to comment.