From 2094168a53875228f39f0b94da80db19151157ce Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 4 Oct 2016 18:03:06 -0400 Subject: [PATCH 1/6] os/bluestore: pack Blob members 272 -> 264 bytes Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 7266dc423fbd8..7f41b6a1e596d 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -407,15 +407,13 @@ class BlueStore : public ObjectStore, /// in-memory blob metadata and associated cached buffers (if any) struct Blob : public boost::intrusive::set_base_hook<> { std::atomic_int nref = {0}; ///< reference count - int id = -1; ///< id, for spanning blobs only, >= 0 + int16_t id = -1; ///< id, for spanning blobs only, >= 0 + int16_t last_encoded_id = -1; ///< (ephemeral) used during encoding only SharedBlobRef shared_blob; ///< shared blob state (if any) /// refs from this shard. ephemeral if id<0, persisted if spanning. bluestore_extent_ref_map_t ref_map; - - int last_encoded_id = -1; ///< (ephemeral) used during encoding only - private: mutable bluestore_blob_t blob; ///< decoded blob metadata mutable bool dirty = true; ///< true if blob is newer than blob_bl From 7c5b77a6f8d58b3ef1560c2b744834c8dd56a948 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 4 Oct 2016 18:04:49 -0400 Subject: [PATCH 2/6] os/bluestore: pack SharedBlob 224 -> 216 bytes Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 7f41b6a1e596d..20e8e929fd31c 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -318,15 +318,15 @@ class BlueStore : public ObjectStore, struct SharedBlob : public boost::intrusive::unordered_set_base_hook<> { std::atomic_int nref = {0}; ///< reference count + // these are defined/set if the shared_blob is 'loaded' + bool loaded = false; ///< whether shared_blob_t is loaded + bluestore_shared_blob_t shared_blob; ///< the actual shared state + // these are defined/set if the blob is marked 'shared' uint64_t sbid = 0; ///< shared blob id string key; ///< key in kv store SharedBlobSet *parent_set = 0; ///< containing SharedBlobSet - // these are defined/set if the shared_blob is 'loaded' - bluestore_shared_blob_t shared_blob; ///< the actual shared state - bool loaded = false; ///< whether shared_blob_t is loaded - BufferSpace bc; ///< buffer cache SharedBlob(uint64_t i, const string& k, Cache *c); From 02f457177afa1c8ae75b0bcc3b20adf103f72199 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 4 Oct 2016 18:14:16 -0400 Subject: [PATCH 3/6] os/bluestore: make BufferSpace smaller Saves 24 bytes! BufferSpace 104 -> 80 bytes SharedBlob 216 -> 192 bytes Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.h | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 20e8e929fd31c..74a3257cbe9c5 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -207,7 +207,23 @@ class BlueStore : public ObjectStore, map> buffer_map; Cache *cache; - map writing_map; + + // we use a list here instead of std::map because it uses less memory and + // we expect this to be very small (very few IOs in flight to the same + // Blob at the same time). + list> writing_map; + + list>::iterator get_writing_map(uint64_t seq) { + for (auto p = writing_map.begin(); p != writing_map.end(); ++p) { + if (p->first == seq) { + return p; + } + } + writing_map.push_back(make_pair(seq, state_list_t())); + auto p = writing_map.end(); + --p; + return p; + } BufferSpace(Cache *c) : cache(c) { if (cache) { @@ -226,7 +242,7 @@ class BlueStore : public ObjectStore, cache->_audit("_add_buffer start"); buffer_map[b->offset].reset(b); if (b->is_writing()) { - writing_map[b->seq].push_back(*b); + get_writing_map(b->seq)->second.push_back(*b); } else { cache->_add_buffer(b, level, near); } @@ -239,11 +255,12 @@ class BlueStore : public ObjectStore, cache->_audit("_rm_buffer start"); if (p->second->is_writing()) { uint64_t seq = (*p->second.get()).seq; - auto it = writing_map.find(seq); - assert(it != writing_map.end()); + auto it = get_writing_map(seq); + assert(!it->second.empty()); it->second.erase(it->second.iterator_to(*p->second)); - if (it->second.empty()) + if (it->second.empty()) { writing_map.erase(it); + } } else { cache->_rm_buffer(p->second.get()); } From 871aa4a30072ea64d5cc9fba820ecaa525a29211 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 5 Oct 2016 09:43:07 -0400 Subject: [PATCH 4/6] os/bluestore: make Buffer smaller 160 -> 152 bytes Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 74a3257cbe9c5..416821389f2d7 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -147,16 +147,16 @@ class BlueStore : public ObjectStore, uint16_t cache_private = 0; ///< opaque (to us) value used by Cache impl uint32_t flags; ///< FLAG_* uint64_t seq; - uint64_t offset, length; + uint32_t offset, length; bufferlist data; boost::intrusive::list_member_hook<> lru_item; boost::intrusive::list_member_hook<> state_item; - Buffer(BufferSpace *space, unsigned s, uint64_t q, uint64_t o, uint64_t l, + Buffer(BufferSpace *space, unsigned s, uint64_t q, uint32_t o, uint32_t l, unsigned f = 0) : space(space), state(s), flags(f), seq(q), offset(o), length(l) {} - Buffer(BufferSpace *space, unsigned s, uint64_t q, uint64_t o, bufferlist& b, + Buffer(BufferSpace *space, unsigned s, uint64_t q, uint32_t o, bufferlist& b, unsigned f = 0) : space(space), state(s), flags(f), seq(q), offset(o), length(b.length()), data(b) {} @@ -171,11 +171,11 @@ class BlueStore : public ObjectStore, return state == STATE_WRITING; } - uint64_t end() const { + uint32_t end() const { return offset + length; } - void truncate(uint64_t newlen) { + void truncate(uint32_t newlen) { assert(newlen < length); if (data.length()) { bufferlist t; From 6bbac0e02fc27432847ee827ac0ce35315d8d766 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 5 Oct 2016 09:44:53 -0400 Subject: [PATCH 5/6] unittest_bluestore_types: more type sizeofs Signed-off-by: Sage Weil --- src/test/objectstore/test_bluestore_types.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test/objectstore/test_bluestore_types.cc b/src/test/objectstore/test_bluestore_types.cc index 61b3164d1059c..833729cdbadaf 100644 --- a/src/test/objectstore/test_bluestore_types.cc +++ b/src/test/objectstore/test_bluestore_types.cc @@ -26,6 +26,9 @@ TEST(bluestore, sizeof) { P(BlueStore::extent_map_t); P(BlueStore::blob_map_t); P(BlueStore::BufferSpace); + P(BlueStore::Buffer); + P(bluestore_onode_t); + P(bluestore_blob_t); P(bluestore_extent_ref_map_t); P(bluestore_extent_ref_map_t::record_t); P(std::atomic_int); From 0932d39e28cd1e106efb697612bb405584e90154 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 5 Oct 2016 09:56:09 -0400 Subject: [PATCH 6/6] os/bluestore: simplify BufferSpace writing_map / list We were tracking buffers by seq. In reality, though, we won't have very many buffers that have in flight IO to the same blob at the same time, so we can get by with just a list here. We can rely on the fact that the list will be sorted (seq's increase, and we put new buffers at the end). This saves us a memory allocation. No change in size of BufferSpace and Blob (std::list -> intrusive::list). Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.cc | 37 +++++++++++++++++------------------ src/os/bluestore/BlueStore.h | 32 +++++++----------------------- 2 files changed, 25 insertions(+), 44 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 1e963e3e52374..dd88ac57c1f38 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -1043,29 +1043,28 @@ void BlueStore::BufferSpace::finish_write(uint64_t seq) { std::lock_guard l(cache->lock); - auto i = writing_map.begin(); - while (i != writing_map.end()) { - if (i->first > seq) + auto i = writing.begin(); + while (i != writing.end()) { + if (i->seq > seq) { break; + } + if (i->seq < seq) { + ++i; + continue; + } - auto l = i->second.begin(); - while (l != i->second.end()) { - Buffer *b = &*l; - dout(20) << __func__ << " " << *b << dendl; - assert(b->is_writing()); + Buffer *b = &*i; + dout(20) << __func__ << " " << *b << dendl; + assert(b->is_writing()); - if (b->flags & Buffer::FLAG_NOCACHE) { - i->second.erase(l++); - buffer_map.erase(b->offset); - } else { - b->state = Buffer::STATE_CLEAN; - i->second.erase(l++); - cache->_add_buffer(b, 1, nullptr); - } + if (b->flags & Buffer::FLAG_NOCACHE) { + writing.erase(i++); + buffer_map.erase(b->offset); + } else { + b->state = Buffer::STATE_CLEAN; + writing.erase(i++); + cache->_add_buffer(b, 1, nullptr); } - - assert(i->second.empty()); - writing_map.erase(i++); } cache->_audit("finish_write end"); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 416821389f2d7..ca9035b0a87b7 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -208,22 +208,10 @@ class BlueStore : public ObjectStore, map> buffer_map; Cache *cache; - // we use a list here instead of std::map because it uses less memory and - // we expect this to be very small (very few IOs in flight to the same - // Blob at the same time). - list> writing_map; - - list>::iterator get_writing_map(uint64_t seq) { - for (auto p = writing_map.begin(); p != writing_map.end(); ++p) { - if (p->first == seq) { - return p; - } - } - writing_map.push_back(make_pair(seq, state_list_t())); - auto p = writing_map.end(); - --p; - return p; - } + // we use a bare intrusive list here instead of std::map because + // it uses less memory and we expect this to be very small (very + // few IOs in flight to the same Blob at the same time). + state_list_t writing; ///< writing buffers, sorted by seq, ascending BufferSpace(Cache *c) : cache(c) { if (cache) { @@ -232,7 +220,7 @@ class BlueStore : public ObjectStore, } ~BufferSpace() { assert(buffer_map.empty()); - assert(writing_map.empty()); + assert(writing.empty()); if (cache) { cache->rm_blob(); } @@ -242,7 +230,7 @@ class BlueStore : public ObjectStore, cache->_audit("_add_buffer start"); buffer_map[b->offset].reset(b); if (b->is_writing()) { - get_writing_map(b->seq)->second.push_back(*b); + writing.push_back(*b); } else { cache->_add_buffer(b, level, near); } @@ -254,13 +242,7 @@ class BlueStore : public ObjectStore, void _rm_buffer(map>::iterator p) { cache->_audit("_rm_buffer start"); if (p->second->is_writing()) { - uint64_t seq = (*p->second.get()).seq; - auto it = get_writing_map(seq); - assert(!it->second.empty()); - it->second.erase(it->second.iterator_to(*p->second)); - if (it->second.empty()) { - writing_map.erase(it); - } + writing.erase(writing.iterator_to(*p->second)); } else { cache->_rm_buffer(p->second.get()); }