Skip to content

Commit

Permalink
mds: trim null dentries proactively
Browse files Browse the repository at this point in the history
Instead of leaving null dentries (e.g. left
behind from unlinks) in the cache until they
fall out of the LRU, actively push them
to the bottom of the LRU and then consume
all nulls at the bottom in trim() even if
the cache is not oversized yet.

This fixes the case where standby replay daemons
would otherwise accumulate a cache full of
null dentries resulting from unlinks, and it
makes the behaviour of active daemons more
deterministic.

Fixes: http://tracker.ceph.com/issues/16919
Signed-off-by: John Spray <john.spray@redhat.com>
  • Loading branch information
John Spray committed Aug 9, 2016
1 parent df32a8b commit c419878
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 5 deletions.
15 changes: 12 additions & 3 deletions src/mds/MDCache.cc
Expand Up @@ -6350,10 +6350,19 @@ bool MDCache::trim(int max, int count)
bool is_standby_replay = mds->is_standby_replay();
int unexpirable = 0;
list<CDentry*> unexpirables;
// trim dentries from the LRU
while (lru.lru_get_size() + unexpirable > (unsigned)max) {

// trim dentries from the LRU: only enough to satisfy `max`,
// unless we see null dentries at the bottom of the LRU,
// in which case trim all those.
bool trimming_nulls = true;
while (trimming_nulls || lru.lru_get_size() + unexpirable > (unsigned)max) {
CDentry *dn = static_cast<CDentry*>(lru.lru_expire());
if (!dn) break;
if (!dn) {
break;
}
if (!dn->get_linkage()->is_null()) {
trimming_nulls = false;
}
if ((is_standby_replay && dn->get_linkage()->inode &&
dn->get_linkage()->inode->item_open_file.is_on_list()) ||
trim_dentry(dn, expiremap)) {
Expand Down
1 change: 1 addition & 0 deletions src/mds/MDLog.cc
Expand Up @@ -850,6 +850,7 @@ void MDLog::replay(MDSInternalContextBase *c)
// empty?
if (journaler->get_read_pos() == journaler->get_write_pos()) {
dout(10) << "replay - journal empty, done." << dendl;
mds->mdcache->trim(-1);
if (c) {
c->complete(0);
}
Expand Down
20 changes: 18 additions & 2 deletions src/mds/journal.cc
Expand Up @@ -1276,7 +1276,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
dn = dir->add_null_dentry(p->dn, p->dnfirst, p->dnlast);
dn->set_version(p->dnv);
if (p->is_dirty()) dn->_mark_dirty(logseg);
dout(10) << "EMetaBlob.replay added " << *dn << dendl;
dout(10) << "EMetaBlob.replay added (full) " << *dn << dendl;
} else {
dn->set_version(p->dnv);
if (p->is_dirty()) dn->_mark_dirty(logseg);
Expand All @@ -1302,6 +1302,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
mds->clog->warn(ss);
}
dir->unlink_inode(dn);
mds->mdcache->touch_dentry_bottom(dn);
}
if (unlinked.count(in))
linked.insert(in);
Expand All @@ -1313,7 +1314,9 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
if (dn->get_linkage()->get_inode() != in && in->get_parent_dn()) {
dout(10) << "EMetaBlob.replay unlinking " << *in << dendl;
unlinked[in] = in->get_parent_dir();
CDentry *unlinked_dn = in->get_parent_dn();
in->get_parent_dir()->unlink_inode(in->get_parent_dn());
mds->mdcache->touch_dentry_bottom(unlinked_dn);
}
if (dn->get_linkage()->get_inode() != in) {
if (!dn->get_linkage()->is_null()) { // note: might be remote. as with stray reintegration.
Expand All @@ -1326,6 +1329,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
mds->clog->warn(ss);
}
dir->unlink_inode(dn);
mds->mdcache->touch_dentry_bottom(dn);
}
if (unlinked.count(in))
linked.insert(in);
Expand Down Expand Up @@ -1371,6 +1375,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
dout(0) << ss.str() << dendl;
}
dir->unlink_inode(dn);
mds->mdcache->touch_dentry_bottom(dn);
}
dir->link_remote_inode(dn, p->ino, p->d_type);
dn->set_version(p->dnv);
Expand All @@ -1392,7 +1397,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
dn = dir->add_null_dentry(p->dn, p->dnfirst, p->dnlast);
dn->set_version(p->dnv);
if (p->dirty) dn->_mark_dirty(logseg);
dout(10) << "EMetaBlob.replay added " << *dn << dendl;
dout(10) << "EMetaBlob.replay added (nullbit) " << *dn << dendl;
} else {
dn->first = p->dnfirst;
if (!dn->get_linkage()->is_null()) {
Expand All @@ -1405,6 +1410,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
if (dn->get_linkage()->is_primary())
unlinked[in] = dir;
dir->unlink_inode(dn);
mds->mdcache->touch_dentry_bottom(dn);
}
}
dn->set_version(p->dnv);
Expand All @@ -1415,6 +1421,10 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
olddir = dir;
if (lump.is_importing())
dn->state_set(CDentry::STATE_AUTH);

// Make null dentries the first things we trim
dout(10) << "EMetaBlob.replay pushing to bottom of lru " << *dn << dendl;
mds->mdcache->touch_dentry_bottom(dn);
}
}

Expand Down Expand Up @@ -1622,7 +1632,13 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
CInode *in = mds->mdcache->get_inode(*p);
if (in) {
dout(10) << "EMetaBlob.replay destroyed " << *p << ", dropping " << *in << dendl;
CDentry *parent = in->get_parent_dn();
mds->mdcache->remove_inode(in);
if (parent) {
dout(10) << "EMetaBlob.replay unlinked from dentry " << *parent << dendl;
assert(parent->get_linkage()->is_null());
mds->mdcache->touch_dentry_bottom(parent);
}
} else {
dout(10) << "EMetaBlob.replay destroyed " << *p << ", not in cache" << dendl;
}
Expand Down

0 comments on commit c419878

Please sign in to comment.