Skip to content

Commit

Permalink
mds: consider client's flushing caps when choosing lock states
Browse files Browse the repository at this point in the history
Client may flush and drop caps at the same time. If client need to
send cap reconnect before the caps get flushed. The issued caps in
the cap reconnect does not include the flushing caps. When choosing
lock states, MDS only consider the issued caps in cap reconnect, it
may choose wrong states.

Fixes: ceph#11482
Signed-off-by: Yan, Zheng <zyan@redhat.com>
(cherry picked from commit ce9a596)
  • Loading branch information
ukernel committed Dec 1, 2015
1 parent 3fcf905 commit 6c68971
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 5 deletions.
4 changes: 2 additions & 2 deletions src/mds/CInode.cc
Expand Up @@ -2694,9 +2694,9 @@ void CInode::choose_lock_state(SimpleLock *lock, int allissued)
}
}

void CInode::choose_lock_states()
void CInode::choose_lock_states(int dirty_caps)
{
int issued = get_caps_issued();
int issued = get_caps_issued() | dirty_caps;
if (is_auth() && (issued & (CEPH_CAP_ANY_EXCL|CEPH_CAP_ANY_WR)) &&
choose_ideal_loner() >= 0)
try_set_loner();
Expand Down
2 changes: 1 addition & 1 deletion src/mds/CInode.h
Expand Up @@ -812,7 +812,7 @@ class CInode : public MDSCacheObject, public InodeStoreBase {

// choose new lock state during recovery, based on issued caps
void choose_lock_state(SimpleLock *lock, int allissued);
void choose_lock_states();
void choose_lock_states(int dirty_caps);

int count_nonstale_caps() {
int n = 0;
Expand Down
5 changes: 5 additions & 0 deletions src/mds/Locker.cc
Expand Up @@ -2463,6 +2463,11 @@ void Locker::handle_client_caps(MClientCaps *m)
<< " op " << ceph_cap_op_name(m->get_op()) << dendl;

if (!mds->is_clientreplay() && !mds->is_active() && !mds->is_stopping()) {
if (mds->is_reconnect() &&
m->get_dirty() && m->get_client_tid() > 0 &&
session->have_completed_flush(m->get_client_tid())) {
mdcache->set_reconnect_dirty_caps(m->get_ino(), m->get_dirty());
}
mds->wait_for_replay(new C_MDS_RetryMessage(mds, m));
return;
}
Expand Down
13 changes: 11 additions & 2 deletions src/mds/MDCache.cc
Expand Up @@ -5374,7 +5374,11 @@ void MDCache::choose_lock_states_and_reconnect_caps()
if (in->is_auth() && !in->is_base() && in->inode.is_dirty_rstat())
in->mark_dirty_rstat();

in->choose_lock_states();
int dirty_caps = 0;
map<inodeno_t, int>::iterator it = cap_imports_dirty.find(in->ino());
if (it != cap_imports_dirty.end())
dirty_caps = it->second;
in->choose_lock_states(dirty_caps);
dout(15) << " chose lock states on " << *in << dendl;

SnapRealm *realm = in->find_snaprealm();
Expand Down Expand Up @@ -5520,6 +5524,7 @@ void MDCache::export_remaining_imported_caps()
}

cap_imports.clear();
cap_imports_dirty.clear();

if (warn_str.peek() != EOF) {
mds->clog->warn() << "failed to reconnect caps for missing inodes:" << "\n";
Expand All @@ -5542,7 +5547,11 @@ void MDCache::try_reconnect_cap(CInode *in, Session *session)
if (in->is_replicated()) {
mds->locker->try_eval(in, CEPH_CAP_LOCKS);
} else {
in->choose_lock_states();
int dirty_caps = 0;
map<inodeno_t, int>::iterator it = cap_imports_dirty.find(in->ino());
if (it != cap_imports_dirty.end())
dirty_caps = it->second;
in->choose_lock_states(dirty_caps);
dout(15) << " chose lock states on " << *in << dendl;
}
}
Expand Down
4 changes: 4 additions & 0 deletions src/mds/MDCache.h
Expand Up @@ -492,6 +492,7 @@ class MDCache {

map<inodeno_t,map<client_t,map<mds_rank_t,ceph_mds_cap_reconnect> > > cap_imports; // ino -> client -> frommds -> capex
map<inodeno_t,filepath> cap_import_paths;
map<inodeno_t,int> cap_imports_dirty;
set<inodeno_t> cap_imports_missing;
int cap_imports_num_opening;

Expand Down Expand Up @@ -549,6 +550,9 @@ class MDCache {
assert(cap_imports[ino][client].size() == 1);
cap_imports.erase(ino);
}
void set_reconnect_dirty_caps(inodeno_t ino, int dirty) {
cap_imports_dirty[ino] |= dirty;
}

// [reconnect/rejoin caps]
map<CInode*,map<client_t, inodeno_t> > reconnected_caps; // inode -> client -> realmino
Expand Down

0 comments on commit 6c68971

Please sign in to comment.