Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

os/bluestore: dedup omap_head, reuse nid instead #12275

Merged
merged 3 commits into from Dec 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
106 changes: 53 additions & 53 deletions src/os/bluestore/BlueStore.cc
Expand Up @@ -4475,13 +4475,13 @@ int BlueStore::fsck(bool deep)
}
}
// omap
if (o->onode.omap_head) {
if (used_omap_head.count(o->onode.omap_head)) {
derr << __func__ << " " << oid << " omap_head " << o->onode.omap_head
if (o->onode.has_omap()) {
if (used_omap_head.count(o->onode.nid)) {
derr << __func__ << " " << oid << " omap_head " << o->onode.nid
<< " already in use" << dendl;
++errors;
} else {
used_omap_head.insert(o->onode.omap_head);
used_omap_head.insert(o->onode.nid);
}
}
}
Expand Down Expand Up @@ -5640,17 +5640,17 @@ BlueStore::OmapIteratorImpl::OmapIteratorImpl(
: c(c), o(o), it(it)
{
RWLock::RLocker l(c->lock);
if (o->onode.omap_head) {
get_omap_key(o->onode.omap_head, string(), &head);
get_omap_tail(o->onode.omap_head, &tail);
if (o->onode.has_omap()) {
get_omap_key(o->onode.nid, string(), &head);
get_omap_tail(o->onode.nid, &tail);
it->lower_bound(head);
}
}

int BlueStore::OmapIteratorImpl::seek_to_first()
{
RWLock::RLocker l(c->lock);
if (o->onode.omap_head) {
if (o->onode.has_omap()) {
it->lower_bound(head);
} else {
it = KeyValueDB::Iterator();
Expand All @@ -5661,9 +5661,9 @@ int BlueStore::OmapIteratorImpl::seek_to_first()
int BlueStore::OmapIteratorImpl::upper_bound(const string& after)
{
RWLock::RLocker l(c->lock);
if (o->onode.omap_head) {
if (o->onode.has_omap()) {
string key;
get_omap_key(o->onode.omap_head, after, &key);
get_omap_key(o->onode.nid, after, &key);
it->upper_bound(key);
} else {
it = KeyValueDB::Iterator();
Expand All @@ -5674,9 +5674,9 @@ int BlueStore::OmapIteratorImpl::upper_bound(const string& after)
int BlueStore::OmapIteratorImpl::lower_bound(const string& to)
{
RWLock::RLocker l(c->lock);
if (o->onode.omap_head) {
if (o->onode.has_omap()) {
string key;
get_omap_key(o->onode.omap_head, to, &key);
get_omap_key(o->onode.nid, to, &key);
it->lower_bound(key);
} else {
it = KeyValueDB::Iterator();
Expand All @@ -5687,13 +5687,13 @@ int BlueStore::OmapIteratorImpl::lower_bound(const string& to)
bool BlueStore::OmapIteratorImpl::valid()
{
RWLock::RLocker l(c->lock);
return o->onode.omap_head && it->valid() && it->raw_key().second <= tail;
return o->onode.has_omap() && it->valid() && it->raw_key().second <= tail;
}

int BlueStore::OmapIteratorImpl::next(bool validate)
{
RWLock::RLocker l(c->lock);
if (o->onode.omap_head) {
if (o->onode.has_omap()) {
it->next();
return 0;
} else {
Expand Down Expand Up @@ -5749,14 +5749,14 @@ int BlueStore::omap_get(
r = -ENOENT;
goto out;
}
if (!o->onode.omap_head)
if (!o->onode.has_omap())
goto out;
o->flush();
{
KeyValueDB::Iterator it = db->get_iterator(PREFIX_OMAP);
string head, tail;
get_omap_header(o->onode.omap_head, &head);
get_omap_tail(o->onode.omap_head, &tail);
get_omap_header(o->onode.nid, &head);
get_omap_tail(o->onode.nid, &tail);
it->lower_bound(head);
while (it->valid()) {
if (it->key() == head) {
Expand Down Expand Up @@ -5812,12 +5812,12 @@ int BlueStore::omap_get_header(
r = -ENOENT;
goto out;
}
if (!o->onode.omap_head)
if (!o->onode.has_omap())
goto out;
o->flush();
{
string head;
get_omap_header(o->onode.omap_head, &head);
get_omap_header(o->onode.nid, &head);
if (db->get(PREFIX_OMAP, head, header) >= 0) {
dout(30) << __func__ << " got header" << dendl;
} else {
Expand Down Expand Up @@ -5859,14 +5859,14 @@ int BlueStore::omap_get_keys(
r = -ENOENT;
goto out;
}
if (!o->onode.omap_head)
if (!o->onode.has_omap())
goto out;
o->flush();
{
KeyValueDB::Iterator it = db->get_iterator(PREFIX_OMAP);
string head, tail;
get_omap_key(o->onode.omap_head, string(), &head);
get_omap_tail(o->onode.omap_head, &tail);
get_omap_key(o->onode.nid, string(), &head);
get_omap_tail(o->onode.nid, &tail);
it->lower_bound(head);
while (it->valid()) {
if (it->key() >= tail) {
Expand Down Expand Up @@ -5919,10 +5919,10 @@ int BlueStore::omap_get_values(
r = -ENOENT;
goto out;
}
if (!o->onode.omap_head)
if (!o->onode.has_omap())
goto out;
o->flush();
_key_encode_u64(o->onode.omap_head, &final_key);
_key_encode_u64(o->onode.nid, &final_key);
final_key.push_back('.');
for (set<string>::const_iterator p = keys.begin(); p != keys.end(); ++p) {
final_key.resize(9); // keep prefix
Expand Down Expand Up @@ -5972,10 +5972,10 @@ int BlueStore::omap_check_keys(
r = -ENOENT;
goto out;
}
if (!o->onode.omap_head)
if (!o->onode.has_omap())
goto out;
o->flush();
_key_encode_u64(o->onode.omap_head, &final_key);
_key_encode_u64(o->onode.nid, &final_key);
final_key.push_back('.');
for (set<string>::const_iterator p = keys.begin(); p != keys.end(); ++p) {
final_key.resize(9); // keep prefix
Expand Down Expand Up @@ -6026,7 +6026,7 @@ ObjectMap::ObjectMapIterator BlueStore::get_omap_iterator(
return ObjectMap::ObjectMapIterator();
}
o->flush();
dout(10) << __func__ << " header = " << o->onode.omap_head <<dendl;
dout(10) << __func__ << " has_omap = " << (int)o->onode.has_omap() <<dendl;
KeyValueDB::Iterator it = db->get_iterator(PREFIX_OMAP);
return ObjectMap::ObjectMapIterator(new OmapIteratorImpl(c, o, it));
}
Expand Down Expand Up @@ -6526,9 +6526,9 @@ void BlueStore::_osr_reap_done(OpSequencer *osr)
txc->log_state_latency(logger, l_bluestore_state_done_lat);
delete txc;
osr->qcond.notify_all();
if (osr->q.empty())
dout(20) << __func__ << " osr " << osr << " q now empty" << dendl;
}
if (osr->q.empty())
dout(20) << __func__ << " osr " << osr << " q now empty" << dendl;
}

if (c) {
Expand Down Expand Up @@ -8254,8 +8254,8 @@ int BlueStore::_do_remove(
int r = _do_truncate(txc, c, o, 0);
if (r < 0)
return r;
if (o->onode.omap_head) {
_do_omap_clear(txc, o->onode.omap_head);
if (o->onode.has_omap()) {
_do_omap_clear(txc, o->onode.nid);
}
o->exists = false;
o->onode = bluestore_onode_t();
Expand Down Expand Up @@ -8385,9 +8385,9 @@ int BlueStore::_omap_clear(TransContext *txc,
{
dout(15) << __func__ << " " << c->cid << " " << o->oid << dendl;
int r = 0;
if (o->onode.omap_head != 0) {
_do_omap_clear(txc, o->onode.omap_head);
o->onode.omap_head = 0;
if (o->onode.has_omap()) {
_do_omap_clear(txc, o->onode.nid);
o->onode.clear_omap_flag();
txc->write_onode(o);
}
dout(10) << __func__ << " " << c->cid << " " << o->oid << " = " << r << dendl;
Expand All @@ -8403,12 +8403,12 @@ int BlueStore::_omap_setkeys(TransContext *txc,
int r;
bufferlist::iterator p = bl.begin();
__u32 num;
if (!o->onode.omap_head) {
o->onode.omap_head = o->onode.nid;
if (!o->onode.has_omap()) {
o->onode.set_omap_flag();
txc->write_onode(o);
}
string final_key;
_key_encode_u64(o->onode.omap_head, &final_key);
_key_encode_u64(o->onode.nid, &final_key);
final_key.push_back('.');
::decode(num, p);
while (num--) {
Expand All @@ -8435,11 +8435,11 @@ int BlueStore::_omap_setheader(TransContext *txc,
dout(15) << __func__ << " " << c->cid << " " << o->oid << dendl;
int r;
string key;
if (!o->onode.omap_head) {
o->onode.omap_head = o->onode.nid;
if (!o->onode.has_omap()) {
o->onode.set_omap_flag();
txc->write_onode(o);
}
get_omap_header(o->onode.omap_head, &key);
get_omap_header(o->onode.nid, &key);
txc->t->set(PREFIX_OMAP, key, bl);
r = 0;
dout(10) << __func__ << " " << c->cid << " " << o->oid << " = " << r << dendl;
Expand All @@ -8457,10 +8457,10 @@ int BlueStore::_omap_rmkeys(TransContext *txc,
__u32 num;
string final_key;

if (!o->onode.omap_head) {
if (!o->onode.has_omap()) {
goto out;
}
_key_encode_u64(o->onode.omap_head, &final_key);
_key_encode_u64(o->onode.nid, &final_key);
final_key.push_back('.');
::decode(num, p);
while (num--) {
Expand All @@ -8487,12 +8487,12 @@ int BlueStore::_omap_rmkey_range(TransContext *txc,
KeyValueDB::Iterator it;
string key_first, key_last;
int r = 0;
if (!o->onode.omap_head) {
if (!o->onode.has_omap()) {
goto out;
}
it = db->get_iterator(PREFIX_OMAP);
get_omap_key(o->onode.omap_head, first, &key_first);
get_omap_key(o->onode.omap_head, last, &key_last);
get_omap_key(o->onode.nid, first, &key_first);
get_omap_key(o->onode.nid, last, &key_last);
it->lower_bound(key_first);
while (it->valid()) {
if (it->key() >= key_last) {
Expand Down Expand Up @@ -8574,19 +8574,19 @@ int BlueStore::_clone(TransContext *txc,
newo->onode.attrs = oldo->onode.attrs;

// clone omap
if (newo->onode.omap_head) {
if (newo->onode.has_omap()) {
dout(20) << __func__ << " clearing old omap data" << dendl;
_do_omap_clear(txc, newo->onode.omap_head);
_do_omap_clear(txc, newo->onode.nid);
}
if (oldo->onode.omap_head) {
if (oldo->onode.has_omap()) {
dout(20) << __func__ << " copying omap data" << dendl;
if (!newo->onode.omap_head) {
newo->onode.omap_head = newo->onode.nid;
if (!newo->onode.has_omap()) {
newo->onode.set_omap_flag();
}
KeyValueDB::Iterator it = db->get_iterator(PREFIX_OMAP);
string head, tail;
get_omap_header(oldo->onode.omap_head, &head);
get_omap_tail(oldo->onode.omap_head, &tail);
get_omap_header(oldo->onode.nid, &head);
get_omap_tail(oldo->onode.nid, &tail);
it->lower_bound(head);
while (it->valid()) {
if (it->key() >= tail) {
Expand All @@ -8596,7 +8596,7 @@ int BlueStore::_clone(TransContext *txc,
dout(30) << __func__ << " got header/data "
<< pretty_binary_string(it->key()) << dendl;
string key;
rewrite_omap_key(newo->onode.omap_head, it->key(), &key);
rewrite_omap_key(newo->onode.nid, it->key(), &key);
txc->t->set(PREFIX_OMAP, key, it->value());
}
it->next();
Expand Down
14 changes: 1 addition & 13 deletions src/os/bluestore/bluestore_types.cc
Expand Up @@ -572,7 +572,7 @@ void bluestore_onode_t::dump(Formatter *f) const
f->close_section();
}
f->close_section();
f->dump_unsigned("omap_head", omap_head);
f->dump_string("flags", get_flags_string());
f->open_array_section("extent_map_shards");
for (auto si : extent_map_shards) {
f->dump_object("shard", si);
Expand All @@ -589,18 +589,6 @@ void bluestore_onode_t::generate_test_instances(list<bluestore_onode_t*>& o)
// FIXME
}

// FIXME: Using this to compute the ctx.csum_order can lead to poor small
// random read performance when initial writes are large.
size_t bluestore_onode_t::get_preferred_csum_order() const
{
uint32_t t = expected_write_size;
if (!t) {
return 0;
}
return ctz(expected_write_size);
}


// bluestore_wal_op_t

void bluestore_wal_op_t::dump(Formatter *f) const
Expand Down
41 changes: 37 additions & 4 deletions src/os/bluestore/bluestore_types.h
Expand Up @@ -667,7 +667,11 @@ struct bluestore_onode_t {
uint64_t nid = 0; ///< numeric id (locally unique)
uint64_t size = 0; ///< object size
map<string, bufferptr> attrs; ///< attrs
uint64_t omap_head = 0; ///< id for omap root node
uint8_t flags = 0;

enum {
FLAG_OMAP = 1,
};

struct shard_info {
uint32_t offset = 0; ///< logical offset for start of shard
Expand All @@ -686,15 +690,44 @@ struct bluestore_onode_t {
uint32_t expected_write_size = 0;
uint32_t alloc_hint_flags = 0;

/// get preferred csum chunk size
size_t get_preferred_csum_order() const;
string get_flags_string() const {
string s;
if (flags & FLAG_OMAP) {
s = "omap";
}
return s;
}

bool has_flag(unsigned f) const {
return flags & f;
}

void set_flag(unsigned f) {
flags |= f;
}

void clear_flag(unsigned f) {
flags &= ~f;
}

bool has_omap() const {
return has_flag(FLAG_OMAP);
}

void set_omap_flag() {
set_flag(FLAG_OMAP);
}

void clear_omap_flag() {
clear_flag(FLAG_OMAP);
}

DENC(bluestore_onode_t, v, p) {
DENC_START(1, 1, p);
denc(v.nid, p);
denc(v.size, p);
denc(v.attrs, p);
denc(v.omap_head, p);
denc(v.flags, p);
denc(v.extent_map_shards, p);
denc(v.expected_object_size, p);
denc(v.expected_write_size, p);
Expand Down