Skip to content

Commit

Permalink
Merge pull request #12394 from ifed01/wip-bluestore-fix-reshard
Browse files Browse the repository at this point in the history
os/bluestore: refactor ExtentMap::update to avoid preceeding db updat…

Reviewed-by: Sage Weil <sage@redhat.com>
  • Loading branch information
liewegas committed Dec 22, 2016
2 parents 7a15752 + c66977b commit fcbabdd
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 29 deletions.
81 changes: 55 additions & 26 deletions src/os/bluestore/BlueStore.cc
Expand Up @@ -1608,12 +1608,11 @@ BlueStore::ExtentMap::ExtentMap(Onode *o)
inline_bl(g_conf->bluestore_extent_map_inline_shard_prealloc_size) {
}


bool BlueStore::ExtentMap::update(Onode *o, KeyValueDB::Transaction t,
bool force)
bool BlueStore::ExtentMap::update(KeyValueDB::Transaction t,
bool force)
{
assert(!needs_reshard);
if (o->onode.extent_map_shards.empty()) {
if (onode->onode.extent_map_shards.empty()) {
if (inline_bl.length() == 0) {
unsigned n;
// we need to encode inline_bl to measure encoded length
Expand All @@ -1628,7 +1627,29 @@ bool BlueStore::ExtentMap::update(Onode *o, KeyValueDB::Transaction t,
}
// will persist in the onode key, see below
} else {

struct dirty_shard_t{
string* key = nullptr;
bufferlist bl;
boost::intrusive::list_member_hook<> dirty_list_item;
};

typedef boost::intrusive::list<
dirty_shard_t,
boost::intrusive::member_hook<
dirty_shard_t,
boost::intrusive::list_member_hook<>,
&dirty_shard_t::dirty_list_item> > dirty_shard_list_t;

vector<dirty_shard_t> encoded_shards;
//allocate slots for all shards in a single call instead of
//doing multiple allocations - one per each dirty shard
encoded_shards.resize(shards.size());

dirty_shard_list_t dirty_shards;

auto p = shards.begin();
unsigned pos = 0;
while (p != shards.end()) {
auto n = p;
++n;
Expand All @@ -1639,8 +1660,8 @@ bool BlueStore::ExtentMap::update(Onode *o, KeyValueDB::Transaction t,
} else {
endoff = n->offset;
}
bufferlist bl;
unsigned nn;
bufferlist& bl = encoded_shards[pos].bl;
if (encode_some(p->offset, endoff - p->offset, bl, &nn)) {
return true;
}
Expand All @@ -1654,24 +1675,32 @@ bool BlueStore::ExtentMap::update(Onode *o, KeyValueDB::Transaction t,
//non-last shard size is below the min threshold. The last check is to avoid potential
//unneeded reshardings since this might happen permanently.
if (!force &&
(len > g_conf->bluestore_extent_map_shard_max_size ||
(n != shards.end() && len < g_conf->bluestore_extent_map_shard_min_size)
)) {
(len > g_conf->bluestore_extent_map_shard_max_size ||
(n != shards.end() && len < g_conf->bluestore_extent_map_shard_min_size))) {
return true;
}
assert(p->shard_info->offset == p->offset);
p->shard_info->bytes = len;
p->shard_info->extents = nn;
t->set(PREFIX_OBJ, p->key, bl);

encoded_shards[pos].key = &p->key;
dirty_shards.push_back(encoded_shards[pos]);
p->dirty = false;
}
p = n;
++pos;
}
//schedule DB update for dirty shards
auto it = dirty_shards.begin();
while(it != dirty_shards.end()) {
t->set(PREFIX_OBJ, *(it->key), it->bl);
++it;
}
}
return false;
}

void BlueStore::ExtentMap::reshard(Onode *o, uint64_t min_alloc_size)
void BlueStore::ExtentMap::reshard(uint64_t min_alloc_size)
{
needs_reshard = false;

Expand All @@ -1686,15 +1715,15 @@ void BlueStore::ExtentMap::reshard(Onode *o, uint64_t min_alloc_size)
if (extent_map.size() <= 1) {
dout(20) << __func__ << " <= 1 extent, going inline" << dendl;
shards.clear();
o->onode.extent_map_shards.clear();
onode->onode.extent_map_shards.clear();
return;
}

unsigned bytes = 0;
if (o->onode.extent_map_shards.empty()) {
if (onode->onode.extent_map_shards.empty()) {
bytes = inline_bl.length();
} else {
for (auto &s : o->onode.extent_map_shards) {
for (auto &s : onode->onode.extent_map_shards) {
bytes += s.bytes;
}
}
Expand Down Expand Up @@ -1736,17 +1765,17 @@ void BlueStore::ExtentMap::reshard(Onode *o, uint64_t min_alloc_size)
max_blob_end = be;
}
}
o->onode.extent_map_shards.swap(new_shard_info);
onode->onode.extent_map_shards.swap(new_shard_info);

// set up new shards vector; ensure shards/inline both dirty/invalidated.
init_shards(o, true, true);
init_shards(true, true);
inline_bl.clear();

// identify spanning blobs
if (!o->onode.extent_map_shards.empty()) {
if (!onode->onode.extent_map_shards.empty()) {
dout(20) << __func__ << " checking for spanning blobs" << dendl;
auto sp = o->onode.extent_map_shards.begin();
auto esp = o->onode.extent_map_shards.end();
auto sp = onode->onode.extent_map_shards.begin();
auto esp = onode->onode.extent_map_shards.end();
unsigned shard_start = 0;
unsigned shard_end;
++sp;
Expand Down Expand Up @@ -2015,12 +2044,12 @@ void BlueStore::ExtentMap::decode_spanning_blobs(
}
}

void BlueStore::ExtentMap::init_shards(Onode *on, bool loaded, bool dirty)
void BlueStore::ExtentMap::init_shards(bool loaded, bool dirty)
{
shards.resize(on->onode.extent_map_shards.size());
shards.resize(onode->onode.extent_map_shards.size());
unsigned i = 0;
for (auto &s : on->onode.extent_map_shards) {
get_extent_shard_key(on->key, s.offset, &shards[i].key);
for (auto &s : onode->onode.extent_map_shards) {
get_extent_shard_key(onode->key, s.offset, &shards[i].key);
shards[i].offset = s.offset;
shards[i].shard_info = &s;
shards[i].loaded = loaded;
Expand Down Expand Up @@ -2456,7 +2485,7 @@ BlueStore::OnodeRef BlueStore::Collection::get_onode(
denc(on->extent_map.inline_bl, p);
on->extent_map.decode_some(on->extent_map.inline_bl);
} else {
on->extent_map.init_shards(on, false, false);
on->extent_map.init_shards(false, false);
}
}
o.reset(on);
Expand Down Expand Up @@ -6398,16 +6427,16 @@ void BlueStore::_txc_write_nodes(TransContext *txc, KeyValueDB::Transaction t)
// finalize extent_map shards
bool reshard = o->extent_map.needs_reshard;
if (!reshard) {
reshard = o->extent_map.update(o.get(), t, false);
reshard = o->extent_map.update(t, false);
}
if (reshard) {
dout(20) << __func__ << " resharding extents for " << o->oid << dendl;
for (auto &s : o->extent_map.shards) {
t->rmkey(PREFIX_OBJ, s.key);
}
o->extent_map.fault_range(db, 0, o->onode.size);
o->extent_map.reshard(o.get(), min_alloc_size);
reshard = o->extent_map.update(o.get(), t, true);
o->extent_map.reshard(min_alloc_size);
reshard = o->extent_map.update(t, true);
if (reshard) {
dout(20) << __func__ << " warning: still wants reshard, check options?"
<< dendl;
Expand Down
6 changes: 3 additions & 3 deletions src/os/bluestore/BlueStore.h
Expand Up @@ -686,11 +686,11 @@ class BlueStore : public ObjectStore,
return p->second;
}

bool update(Onode *on, KeyValueDB::Transaction t, bool force);
void reshard(Onode *on, uint64_t min_alloc_size);
bool update(KeyValueDB::Transaction t, bool force);
void reshard(uint64_t min_alloc_size);

/// initialize Shards from the onode
void init_shards(Onode *on, bool loaded, bool dirty);
void init_shards(bool loaded, bool dirty);

/// return index of shard containing offset
/// or -1 if not found
Expand Down

0 comments on commit fcbabdd

Please sign in to comment.