diff --git a/do_cmake.sh b/do_cmake.sh index 9d3c3c1ed4457..71340376695d2 100755 --- a/do_cmake.sh +++ b/do_cmake.sh @@ -8,9 +8,13 @@ mkdir build cd build cmake $@ .. +# minimal config to find plugins cat < ceph.conf plugin dir = lib erasure code dir = lib EOF +# give vstart a (hopefully) unique mon port to start with +echo $(( RANDOM % 1000 + 40000 )) > .ceph_port + echo done. diff --git a/src/common/config_opts.h b/src/common/config_opts.h index a96b8b30044e0..8e45b4b310321 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -990,7 +990,7 @@ OPTION(bluestore_extent_map_shard_min_size, OPT_U32, 150) OPTION(bluestore_extent_map_shard_target_size_slop, OPT_DOUBLE, .2) OPTION(bluestore_extent_map_inline_shard_prealloc_size, OPT_U32, 256) OPTION(bluestore_cache_type, OPT_STR, "2q") // lru, 2q -OPTION(bluestore_onode_cache_size, OPT_U32, 16*1024) +OPTION(bluestore_onode_cache_size, OPT_U32, 4*1024) OPTION(bluestore_buffer_cache_size, OPT_U32, 512*1024*1024) OPTION(bluestore_shared_blob_hash_table_size_ratio, OPT_FLOAT, 2) // multiple of onode_cache_size OPTION(bluestore_kvbackend, OPT_STR, "rocksdb") diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 5fa3e178a00f8..7c48cdc5d8f00 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -1172,7 +1172,6 @@ ostream& operator<<(ostream& out, const BlueStore::SharedBlob& sb) out << "SharedBlob(" << &sb; if (sb.sbid) { out << " sbid 0x" << std::hex << sb.sbid << std::dec; - assert(sb.parent_set); } if (sb.loaded) { out << " loaded " << sb.shared_blob; @@ -1720,14 +1719,14 @@ void BlueStore::ExtentMap::decode_some(bufferlist& bl) le->blob_depth = 1; } if (blobid & BLOBID_FLAG_SPANNING) { - le->blob = get_spanning_blob(blobid >> BLOBID_SHIFT_BITS); + le->assign_blob(get_spanning_blob(blobid >> BLOBID_SHIFT_BITS)); } else { blobid >>= BLOBID_SHIFT_BITS; if (blobid) { - le->blob = blobs[blobid - 1]; + le->assign_blob(blobs[blobid - 1]); assert(le->blob); } else { - le->blob = new Blob(); + le->assign_blob(new Blob()); le->blob->decode(p); blobs[n] = le->blob; onode->c->open_shared_blob(le->blob); @@ -2064,7 +2063,8 @@ BlueStore::Collection::Collection(BlueStore *ns, Cache *c, coll_t cid) exists(true), // size the shared blob hash table as a ratio of the onode cache size. shared_blob_set(MAX(16, - g_conf->bluestore_onode_cache_size * + g_conf->bluestore_onode_cache_size / + store->cache_shards.size() * g_conf->bluestore_shared_blob_hash_table_size_ratio)), onode_map(c) { @@ -2190,6 +2190,13 @@ BlueStore::OnodeRef BlueStore::Collection::get_onode( return onode_map.add(oid, o); } +void BlueStore::Collection::trim_cache() +{ + cache->trim( + g_conf->bluestore_onode_cache_size / store->cache_shards.size(), + g_conf->bluestore_buffer_cache_size / store->cache_shards.size()); +} + // ======================================================= @@ -2414,7 +2421,7 @@ void BlueStore::_init_logger() "Sum for wal write op"); b.add_u64(l_bluestore_wal_write_bytes, "wal_write_bytes", "Sum for wal write bytes"); - b.add_u64(l_bluestore_write_penalty_read_ops, " write_penalty_read_ops", + b.add_u64(l_bluestore_write_penalty_read_ops, "write_penalty_read_ops", "Sum for write penalty read ops"); b.add_u64(l_bluestore_allocated, "bluestore_allocated", "Sum for allocated bytes"); @@ -2427,10 +2434,20 @@ void BlueStore::_init_logger() b.add_u64(l_bluestore_compressed_original, "bluestore_compressed_original", "Sum for original bytes that were compressed"); + b.add_u64(l_bluestore_onodes, "bluestore_onodes", + "Number of onodes in cache"); b.add_u64(l_bluestore_onode_hits, "bluestore_onode_hits", "Sum for onode-lookups hit in the cache"); b.add_u64(l_bluestore_onode_misses, "bluestore_onode_misses", "Sum for onode-lookups missed in the cache"); + b.add_u64(l_bluestore_extents, "bluestore_extents", + "Number of extents in cache"); + b.add_u64(l_bluestore_blobs, "bluestore_blobs", + "Number of blobs in cache"); + b.add_u64(l_bluestore_buffers, "bluestore_buffers", + "Number of buffers in cache"); + b.add_u64(l_bluestore_buffer_bytes, "bluestore_buffer_bytes", + "Number of buffer bytes in cache"); b.add_u64(l_bluestore_buffer_hit_bytes, "bluestore_buffer_hit_bytes", "Sum for bytes of read hit in the cache"); b.add_u64(l_bluestore_buffer_miss_bytes, "bluestore_buffer_miss_bytes", @@ -4508,6 +4525,24 @@ void BlueStore::_reap_collections() } } +void BlueStore::_update_cache_logger() +{ + uint64_t num_onodes = 0; + uint64_t num_extents = 0; + uint64_t num_blobs = 0; + uint64_t num_buffers = 0; + uint64_t num_buffer_bytes = 0; + for (auto c : cache_shards) { + c->add_stats(&num_onodes, &num_extents, &num_blobs, + &num_buffers, &num_buffer_bytes); + } + logger->set(l_bluestore_onodes, num_onodes); + logger->set(l_bluestore_extents, num_extents); + logger->set(l_bluestore_blobs, num_blobs); + logger->set(l_bluestore_buffers, num_buffers); + logger->set(l_bluestore_buffer_bytes, num_buffer_bytes); +} + // --------------- // read operations @@ -4540,10 +4575,7 @@ bool BlueStore::exists(CollectionHandle &c_, const ghobject_t& oid) r = false; } - c->cache->trim( - g_conf->bluestore_onode_cache_size, - g_conf->bluestore_buffer_cache_size); - + c->trim_cache(); return r; } @@ -4581,9 +4613,7 @@ int BlueStore::stat( st->st_nlink = 1; } - c->cache->trim( - g_conf->bluestore_onode_cache_size, - g_conf->bluestore_buffer_cache_size); + c->trim_cache(); int r = 0; if (_debug_mdata_eio(oid)) { r = -EIO; @@ -4643,9 +4673,7 @@ int BlueStore::read( out: assert(allow_eio || r != -EIO); - c->cache->trim( - g_conf->bluestore_onode_cache_size, - g_conf->bluestore_buffer_cache_size); + c->trim_cache(); if (r == 0 && _debug_data_eio(oid)) { r = -EIO; derr << __func__ << " " << c->cid << " " << oid << " INJECT EIO" << dendl; @@ -5040,9 +5068,7 @@ int BlueStore::fiemap( } out: - c->cache->trim( - g_conf->bluestore_onode_cache_size, - g_conf->bluestore_buffer_cache_size); + c->trim_cache(); ::encode(m, bl); dout(20) << __func__ << " 0x" << std::hex << offset << "~" << length << " size = 0x(" << m << ")" << std::dec << dendl; @@ -5091,9 +5117,7 @@ int BlueStore::getattr( r = 0; } out: - c->cache->trim( - g_conf->bluestore_onode_cache_size, - g_conf->bluestore_buffer_cache_size); + c->trim_cache(); if (r == 0 && _debug_mdata_eio(oid)) { r = -EIO; derr << __func__ << " " << c->cid << " " << oid << " INJECT EIO" << dendl; @@ -5139,9 +5163,7 @@ int BlueStore::getattrs( } out: - c->cache->trim( - g_conf->bluestore_onode_cache_size, - g_conf->bluestore_buffer_cache_size); + c->trim_cache(); if (r == 0 && _debug_mdata_eio(oid)) { r = -EIO; derr << __func__ << " " << c->cid << " " << oid << " INJECT EIO" << dendl; @@ -5322,9 +5344,7 @@ int BlueStore::collection_list( } out: - c->cache->trim( - g_conf->bluestore_onode_cache_size, - g_conf->bluestore_buffer_cache_size); + c->trim_cache(); dout(10) << __func__ << " " << c->cid << " start " << start << " end " << end << " max " << max << " = " << r << ", ls.size() = " << ls->size() @@ -6172,9 +6192,7 @@ void BlueStore::_osr_reap_done(OpSequencer *osr) } if (c) { - c->cache->trim( - g_conf->bluestore_onode_cache_size, - g_conf->bluestore_buffer_cache_size); + c->trim_cache(); } } @@ -6357,6 +6375,8 @@ void BlueStore::_kv_sync_thread() // this is as good a place as any ... _reap_collections(); + _update_cache_logger(); + if (bluefs) { if (!bluefs_gift_extents.empty()) { _commit_bluefs_freespace(bluefs_gift_extents); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index cc2709c2fc9da..7266dc423fbd8 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -72,8 +72,13 @@ enum { l_bluestore_compressed, l_bluestore_compressed_allocated, l_bluestore_compressed_original, + l_bluestore_onodes, l_bluestore_onode_hits, l_bluestore_onode_misses, + l_bluestore_extents, + l_bluestore_blobs, + l_bluestore_buffers, + l_bluestore_buffer_bytes, l_bluestore_buffer_hit_bytes, l_bluestore_buffer_miss_bytes, l_bluestore_write_big, @@ -204,10 +209,17 @@ class BlueStore : public ObjectStore, Cache *cache; map writing_map; - BufferSpace(Cache *c) : cache(c) {} + BufferSpace(Cache *c) : cache(c) { + if (cache) { + cache->add_blob(); + } + } ~BufferSpace() { assert(buffer_map.empty()); assert(writing_map.empty()); + if (cache) { + cache->rm_blob(); + } } void _add_buffer(Buffer *b, int level, Buffer *near) { @@ -509,10 +521,27 @@ class BlueStore : public ObjectStore, uint8_t blob_depth; /// blob overlapping count BlobRef blob; ///< the blob with our data - explicit Extent() {} - explicit Extent(uint32_t lo) : logical_offset(lo) {} + /// ctor for lookup only + explicit Extent(uint32_t lo) : logical_offset(lo) { } + /// ctor for delayed intitialization (see decode_some()) + explicit Extent() { + } + /// ctor for general usage Extent(uint32_t lo, uint32_t o, uint32_t l, uint8_t bd, BlobRef& b) - : logical_offset(lo), blob_offset(o), length(l), blob_depth(bd), blob(b){} + : logical_offset(lo), blob_offset(o), length(l), blob_depth(bd) { + assign_blob(b); + } + ~Extent() { + if (blob) { + blob->shared_blob->bc.cache->rm_extent(); + } + } + + void assign_blob(const BlobRef& b) { + assert(!blob); + blob = b; + blob->shared_blob->bc.cache->add_extent(); + } // comparators for intrusive_set friend bool operator<(const Extent &a, const Extent &b) { @@ -699,6 +728,9 @@ class BlueStore : public ObjectStore, PerfCounters *logger; std::recursive_mutex lock; ///< protect lru and other structures + std::atomic num_extents = {0}; + std::atomic num_blobs = {0}; + static Cache *create(string type, PerfCounters *logger); virtual ~Cache() {} @@ -712,8 +744,27 @@ class BlueStore : public ObjectStore, virtual void _adjust_buffer_size(Buffer *b, int64_t delta) = 0; virtual void _touch_buffer(Buffer *b) = 0; + void add_extent() { + ++num_extents; + } + void rm_extent() { + --num_extents; + } + + void add_blob() { + ++num_blobs; + } + void rm_blob() { + --num_blobs; + } + virtual void trim(uint64_t onode_max, uint64_t buffer_max) = 0; + virtual void add_stats(uint64_t *onodes, uint64_t *extents, + uint64_t *blobs, + uint64_t *buffers, + uint64_t *bytes) = 0; + #ifdef DEBUG_CACHE virtual void _audit(const char *s) = 0; #else @@ -785,6 +836,18 @@ class BlueStore : public ObjectStore, void trim(uint64_t onode_max, uint64_t buffer_max) override; + void add_stats(uint64_t *onodes, uint64_t *extents, + uint64_t *blobs, + uint64_t *buffers, + uint64_t *bytes) override { + std::lock_guard l(lock); + *onodes += onode_lru.size(); + *extents += num_extents; + *blobs += num_blobs; + *buffers += buffer_lru.size(); + *bytes += buffer_size; + } + #ifdef DEBUG_CACHE void _audit(const char *s) override; #endif @@ -860,6 +923,18 @@ class BlueStore : public ObjectStore, void trim(uint64_t onode_max, uint64_t buffer_max) override; + void add_stats(uint64_t *onodes, uint64_t *extents, + uint64_t *blobs, + uint64_t *buffers, + uint64_t *bytes) override { + std::lock_guard l(lock); + *onodes += onode_lru.size(); + *extents += num_extents; + *blobs += num_blobs; + *buffers += buffer_hot.size() + buffer_warm_in.size(); + *bytes += buffer_bytes; + } + #ifdef DEBUG_CACHE void _audit(const char *s) override; #endif @@ -940,6 +1015,8 @@ class BlueStore : public ObjectStore, return false; } + void trim_cache(); + Collection(BlueStore *ns, Cache *ca, coll_t c); }; typedef boost::intrusive_ptr CollectionRef; @@ -1414,6 +1491,7 @@ class BlueStore : public ObjectStore, CollectionRef _get_collection(const coll_t& cid); void _queue_reap_collection(CollectionRef& c); void _reap_collections(); + void _update_cache_logger(); void _assign_nid(TransContext *txc, OnodeRef o); uint64_t _assign_blobid(TransContext *txc); diff --git a/src/test/objectstore/test_bluestore_types.cc b/src/test/objectstore/test_bluestore_types.cc index f230b7c0e038d..a4085155bebf9 100644 --- a/src/test/objectstore/test_bluestore_types.cc +++ b/src/test/objectstore/test_bluestore_types.cc @@ -13,6 +13,19 @@ #include +#define _STR(x) #x +#define STRINGIFY(x) _STR(x) + +TEST(bluestore, sizeof) { +#define P(t) cout << STRINGIFY(t) << "\t" << sizeof(t) << std::endl + P(BlueStore::Onode); + P(BlueStore::Extent); + P(BlueStore::Blob); + P(BlueStore::SharedBlob); + P(bluestore_extent_ref_map_t); + P(bluestore_extent_ref_map_t::record_t); +} + TEST(bluestore_extent_ref_map_t, add) { bluestore_extent_ref_map_t m; @@ -669,8 +682,10 @@ TEST(Blob, put_ref) TEST(ExtentMap, find_lextent) { + BlueStore::LRUCache cache; BlueStore::ExtentMap em(nullptr); BlueStore::BlobRef br(new BlueStore::Blob); + br->shared_blob = new BlueStore::SharedBlob(-1, string(), &cache); ASSERT_EQ(em.extent_map.end(), em.find_lextent(0)); ASSERT_EQ(em.extent_map.end(), em.find_lextent(100)); @@ -713,8 +728,10 @@ TEST(ExtentMap, find_lextent) TEST(ExtentMap, seek_lextent) { + BlueStore::LRUCache cache; BlueStore::ExtentMap em(nullptr); BlueStore::BlobRef br(new BlueStore::Blob); + br->shared_blob = new BlueStore::SharedBlob(-1, string(), &cache); ASSERT_EQ(em.extent_map.end(), em.seek_lextent(0)); ASSERT_EQ(em.extent_map.end(), em.seek_lextent(100)); @@ -757,8 +774,10 @@ TEST(ExtentMap, seek_lextent) TEST(ExtentMap, has_any_lextents) { + BlueStore::LRUCache cache; BlueStore::ExtentMap em(nullptr); BlueStore::BlobRef b(new BlueStore::Blob); + b->shared_blob = new BlueStore::SharedBlob(-1, string(), &cache); ASSERT_FALSE(em.has_any_lextents(0, 0)); ASSERT_FALSE(em.has_any_lextents(0, 1000)); @@ -799,10 +818,14 @@ TEST(ExtentMap, has_any_lextents) TEST(ExtentMap, compress_extent_map) { + BlueStore::LRUCache cache; BlueStore::ExtentMap em(nullptr); BlueStore::BlobRef b1(new BlueStore::Blob); BlueStore::BlobRef b2(new BlueStore::Blob); BlueStore::BlobRef b3(new BlueStore::Blob); + b1->shared_blob = new BlueStore::SharedBlob(-1, string(), &cache); + b2->shared_blob = new BlueStore::SharedBlob(-1, string(), &cache); + b3->shared_blob = new BlueStore::SharedBlob(-1, string(), &cache); em.extent_map.insert(*new BlueStore::Extent(0, 0, 100, 1, b1)); em.extent_map.insert(*new BlueStore::Extent(100, 0, 100, 1, b2));