Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

os/bluestore: make cache settings process-wide #11295

Merged
merged 10 commits into from Oct 4, 2016
4 changes: 4 additions & 0 deletions do_cmake.sh
Expand Up @@ -8,9 +8,13 @@ mkdir build
cd build
cmake $@ ..

# minimal config to find plugins
cat <<EOF > ceph.conf
plugin dir = lib
erasure code dir = lib
EOF

# give vstart a (hopefully) unique mon port to start with
echo $(( RANDOM % 1000 + 40000 )) > .ceph_port

echo done.
77 changes: 49 additions & 28 deletions src/os/bluestore/BlueStore.cc
Expand Up @@ -1695,7 +1695,7 @@ void BlueStore::ExtentMap::decode_some(bufferlist& bl)
uint64_t prev_len = 0;
unsigned n = 0;
while (!p.end()) {
Extent *le = new Extent();
Extent *le = new Extent(onode->c->cache);
uint64_t blobid;
small_decode_varint(blobid, p);
if ((blobid & BLOBID_FLAG_CONTIGUOUS) == 0) {
Expand Down Expand Up @@ -2064,7 +2064,8 @@ BlueStore::Collection::Collection(BlueStore *ns, Cache *c, coll_t cid)
exists(true),
// size the shared blob hash table as a ratio of the onode cache size.
shared_blob_set(MAX(16,
g_conf->bluestore_onode_cache_size *
g_conf->bluestore_onode_cache_size /
store->cache_shards.size() *
g_conf->bluestore_shared_blob_hash_table_size_ratio)),
onode_map(c)
{
Expand Down Expand Up @@ -2190,6 +2191,13 @@ BlueStore::OnodeRef BlueStore::Collection::get_onode(
return onode_map.add(oid, o);
}

void BlueStore::Collection::trim_cache()
{
cache->trim(
g_conf->bluestore_onode_cache_size / store->cache_shards.size(),
g_conf->bluestore_buffer_cache_size / store->cache_shards.size());
}



// =======================================================
Expand Down Expand Up @@ -2414,7 +2422,7 @@ void BlueStore::_init_logger()
"Sum for wal write op");
b.add_u64(l_bluestore_wal_write_bytes, "wal_write_bytes",
"Sum for wal write bytes");
b.add_u64(l_bluestore_write_penalty_read_ops, " write_penalty_read_ops",
b.add_u64(l_bluestore_write_penalty_read_ops, "write_penalty_read_ops",
"Sum for write penalty read ops");
b.add_u64(l_bluestore_allocated, "bluestore_allocated",
"Sum for allocated bytes");
Expand All @@ -2427,10 +2435,20 @@ void BlueStore::_init_logger()
b.add_u64(l_bluestore_compressed_original, "bluestore_compressed_original",
"Sum for original bytes that were compressed");

b.add_u64(l_bluestore_onodes, "bluestore_onodes",
"Number of onodes in cache");
b.add_u64(l_bluestore_onode_hits, "bluestore_onode_hits",
"Sum for onode-lookups hit in the cache");
b.add_u64(l_bluestore_onode_misses, "bluestore_onode_misses",
"Sum for onode-lookups missed in the cache");
b.add_u64(l_bluestore_extents, "bluestore_extents",
"Number of extents in cache");
b.add_u64(l_bluestore_blobs, "bluestore_blobs",
"Number of blobs in cache");
b.add_u64(l_bluestore_buffers, "bluestore_buffers",
"Number of buffers in cache");
b.add_u64(l_bluestore_buffer_bytes, "bluestore_buffer_bytes",
"Number of buffer bytes in cache");
b.add_u64(l_bluestore_buffer_hit_bytes, "bluestore_buffer_hit_bytes",
"Sum for bytes of read hit in the cache");
b.add_u64(l_bluestore_buffer_miss_bytes, "bluestore_buffer_miss_bytes",
Expand Down Expand Up @@ -4508,6 +4526,24 @@ void BlueStore::_reap_collections()
}
}

void BlueStore::_update_cache_logger()
{
uint64_t num_onodes = 0;
uint64_t num_extents = 0;
uint64_t num_blobs = 0;
uint64_t num_buffers = 0;
uint64_t num_buffer_bytes = 0;
for (auto c : cache_shards) {
c->add_stats(&num_onodes, &num_extents, &num_blobs,
&num_buffers, &num_buffer_bytes);
}
logger->set(l_bluestore_onodes, num_onodes);
logger->set(l_bluestore_extents, num_extents);
logger->set(l_bluestore_blobs, num_blobs);
logger->set(l_bluestore_buffers, num_buffers);
logger->set(l_bluestore_buffer_bytes, num_buffer_bytes);
}

// ---------------
// read operations

Expand Down Expand Up @@ -4540,10 +4576,7 @@ bool BlueStore::exists(CollectionHandle &c_, const ghobject_t& oid)
r = false;
}

c->cache->trim(
g_conf->bluestore_onode_cache_size,
g_conf->bluestore_buffer_cache_size);

c->trim_cache();
return r;
}

Expand Down Expand Up @@ -4581,9 +4614,7 @@ int BlueStore::stat(
st->st_nlink = 1;
}

c->cache->trim(
g_conf->bluestore_onode_cache_size,
g_conf->bluestore_buffer_cache_size);
c->trim_cache();
int r = 0;
if (_debug_mdata_eio(oid)) {
r = -EIO;
Expand Down Expand Up @@ -4643,9 +4674,7 @@ int BlueStore::read(

out:
assert(allow_eio || r != -EIO);
c->cache->trim(
g_conf->bluestore_onode_cache_size,
g_conf->bluestore_buffer_cache_size);
c->trim_cache();
if (r == 0 && _debug_data_eio(oid)) {
r = -EIO;
derr << __func__ << " " << c->cid << " " << oid << " INJECT EIO" << dendl;
Expand Down Expand Up @@ -5040,9 +5069,7 @@ int BlueStore::fiemap(
}

out:
c->cache->trim(
g_conf->bluestore_onode_cache_size,
g_conf->bluestore_buffer_cache_size);
c->trim_cache();
::encode(m, bl);
dout(20) << __func__ << " 0x" << std::hex << offset << "~" << length
<< " size = 0x(" << m << ")" << std::dec << dendl;
Expand Down Expand Up @@ -5091,9 +5118,7 @@ int BlueStore::getattr(
r = 0;
}
out:
c->cache->trim(
g_conf->bluestore_onode_cache_size,
g_conf->bluestore_buffer_cache_size);
c->trim_cache();
if (r == 0 && _debug_mdata_eio(oid)) {
r = -EIO;
derr << __func__ << " " << c->cid << " " << oid << " INJECT EIO" << dendl;
Expand Down Expand Up @@ -5139,9 +5164,7 @@ int BlueStore::getattrs(
}

out:
c->cache->trim(
g_conf->bluestore_onode_cache_size,
g_conf->bluestore_buffer_cache_size);
c->trim_cache();
if (r == 0 && _debug_mdata_eio(oid)) {
r = -EIO;
derr << __func__ << " " << c->cid << " " << oid << " INJECT EIO" << dendl;
Expand Down Expand Up @@ -5322,9 +5345,7 @@ int BlueStore::collection_list(
}

out:
c->cache->trim(
g_conf->bluestore_onode_cache_size,
g_conf->bluestore_buffer_cache_size);
c->trim_cache();
dout(10) << __func__ << " " << c->cid
<< " start " << start << " end " << end << " max " << max
<< " = " << r << ", ls.size() = " << ls->size()
Expand Down Expand Up @@ -6172,9 +6193,7 @@ void BlueStore::_osr_reap_done(OpSequencer *osr)
}

if (c) {
c->cache->trim(
g_conf->bluestore_onode_cache_size,
g_conf->bluestore_buffer_cache_size);
c->trim_cache();
}
}

Expand Down Expand Up @@ -6357,6 +6376,8 @@ void BlueStore::_kv_sync_thread()
// this is as good a place as any ...
_reap_collections();

_update_cache_logger();

if (bluefs) {
if (!bluefs_gift_extents.empty()) {
_commit_bluefs_freespace(bluefs_gift_extents);
Expand Down
83 changes: 79 additions & 4 deletions src/os/bluestore/BlueStore.h
Expand Up @@ -72,8 +72,13 @@ enum {
l_bluestore_compressed,
l_bluestore_compressed_allocated,
l_bluestore_compressed_original,
l_bluestore_onodes,
l_bluestore_onode_hits,
l_bluestore_onode_misses,
l_bluestore_extents,
l_bluestore_blobs,
l_bluestore_buffers,
l_bluestore_buffer_bytes,
l_bluestore_buffer_hit_bytes,
l_bluestore_buffer_miss_bytes,
l_bluestore_write_big,
Expand Down Expand Up @@ -204,10 +209,17 @@ class BlueStore : public ObjectStore,
Cache *cache;
map<uint64_t, state_list_t> writing_map;

BufferSpace(Cache *c) : cache(c) {}
BufferSpace(Cache *c) : cache(c) {
if (cache) {
cache->add_blob();
}
}
~BufferSpace() {
assert(buffer_map.empty());
assert(writing_map.empty());
if (cache) {
cache->rm_blob();
}
}

void _add_buffer(Buffer *b, int level, Buffer *near) {
Expand Down Expand Up @@ -509,10 +521,24 @@ class BlueStore : public ObjectStore,
uint8_t blob_depth; /// blob overlapping count
BlobRef blob; ///< the blob with our data

explicit Extent() {}
explicit Extent(uint32_t lo) : logical_offset(lo) {}
/// ctor for lookup only
explicit Extent(uint32_t lo) : logical_offset(lo) { }
/// ctor for delayed intitialization (see decode_some())
explicit Extent(Cache *cache) {
Copy link
Contributor

@ifed01 ifed01 Oct 4, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMHO specific blob assignment method that also increments cache counter(s) is more transparent and error prone, e.g.
void Extent::assign_blob(BlobRef b) {
if(blob) blob->shared_blob->bc.cache->rm_extent();
blob = b;
if( blob ) blob->shared_blob->bc.cache->add_extent();
}

cache->add_extent();
}
/// ctor for general usage
Extent(uint32_t lo, uint32_t o, uint32_t l, uint8_t bd, BlobRef& b)
: logical_offset(lo), blob_offset(o), length(l), blob_depth(bd), blob(b){}
: logical_offset(lo), blob_offset(o), length(l), blob_depth(bd), blob(b) {
if (blob) {
blob->shared_blob->bc.cache->add_extent();
}
}
~Extent() {
if (blob) {
blob->shared_blob->bc.cache->rm_extent();
}
}

// comparators for intrusive_set
friend bool operator<(const Extent &a, const Extent &b) {
Expand Down Expand Up @@ -699,6 +725,9 @@ class BlueStore : public ObjectStore,
PerfCounters *logger;
std::recursive_mutex lock; ///< protect lru and other structures

std::atomic<uint64_t> num_extents = {0};
std::atomic<uint64_t> num_blobs = {0};

static Cache *create(string type, PerfCounters *logger);

virtual ~Cache() {}
Expand All @@ -712,8 +741,27 @@ class BlueStore : public ObjectStore,
virtual void _adjust_buffer_size(Buffer *b, int64_t delta) = 0;
virtual void _touch_buffer(Buffer *b) = 0;

void add_extent() {
++num_extents;
}
void rm_extent() {
--num_extents;
}

void add_blob() {
++num_blobs;
}
void rm_blob() {
--num_blobs;
}

virtual void trim(uint64_t onode_max, uint64_t buffer_max) = 0;

virtual void add_stats(uint64_t *onodes, uint64_t *extents,
uint64_t *blobs,
uint64_t *buffers,
uint64_t *bytes) = 0;

#ifdef DEBUG_CACHE
virtual void _audit(const char *s) = 0;
#else
Expand Down Expand Up @@ -785,6 +833,18 @@ class BlueStore : public ObjectStore,

void trim(uint64_t onode_max, uint64_t buffer_max) override;

void add_stats(uint64_t *onodes, uint64_t *extents,
uint64_t *blobs,
uint64_t *buffers,
uint64_t *bytes) override {
std::lock_guard<std::recursive_mutex> l(lock);
*onodes += onode_lru.size();
*extents += num_extents;
*blobs += num_blobs;
*buffers += buffer_lru.size();
*bytes += buffer_size;
}

#ifdef DEBUG_CACHE
void _audit(const char *s) override;
#endif
Expand Down Expand Up @@ -860,6 +920,18 @@ class BlueStore : public ObjectStore,

void trim(uint64_t onode_max, uint64_t buffer_max) override;

void add_stats(uint64_t *onodes, uint64_t *extents,
uint64_t *blobs,
uint64_t *buffers,
uint64_t *bytes) override {
std::lock_guard<std::recursive_mutex> l(lock);
*onodes += onode_lru.size();
*extents += num_extents;
*blobs += num_blobs;
*buffers += buffer_hot.size() + buffer_warm_in.size();
*bytes += buffer_bytes;
}

#ifdef DEBUG_CACHE
void _audit(const char *s) override;
#endif
Expand Down Expand Up @@ -940,6 +1012,8 @@ class BlueStore : public ObjectStore,
return false;
}

void trim_cache();

Collection(BlueStore *ns, Cache *ca, coll_t c);
};
typedef boost::intrusive_ptr<Collection> CollectionRef;
Expand Down Expand Up @@ -1414,6 +1488,7 @@ class BlueStore : public ObjectStore,
CollectionRef _get_collection(const coll_t& cid);
void _queue_reap_collection(CollectionRef& c);
void _reap_collections();
void _update_cache_logger();

void _assign_nid(TransContext *txc, OnodeRef o);
uint64_t _assign_blobid(TransContext *txc);
Expand Down