From 1e0c47cc530698863d57fdedeb5438521c3643e1 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 19 Sep 2016 15:22:01 +0100 Subject: [PATCH 1/2] mds: use a random nonce in Messenger The MDS is a client to the OSDs, and responds to blacklists by respawning itself. Usually respawns of a daemonized process result in a PID change, but it's not guaranteed, and it's definitely not the case when someone runs in foreground (e.g. teuthology). Using a random nonce makes sure we won't match against an existing blacklist entry from a failed instance of an MDS daemon with the same name as us. Related to: http://tracker.ceph.com/issues/17236 Signed-off-by: John Spray (cherry picked from commit 5ba612882750dae6f0b057c660cd283293a18a3f) Conflicts: src/ceph_mds.cc : Messenger::create() prototype is different --- src/ceph_mds.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ceph_mds.cc b/src/ceph_mds.cc index efe22e6b995ff..58bda1b5422ac 100644 --- a/src/ceph_mds.cc +++ b/src/ceph_mds.cc @@ -136,9 +136,12 @@ int main(int argc, const char **argv) "MDS names may not start with a numeric digit." << dendl; } + uint64_t nonce = 0; + get_random_bytes((char*)&nonce, sizeof(nonce)); + Messenger *msgr = Messenger::create(g_ceph_context, g_conf->ms_type, entity_name_t::MDS(-1), "mds", - getpid()); + nonce); if (!msgr) exit(1); msgr->set_cluster_protocol(CEPH_MDS_PROTOCOL); From 2ee3e54496567061c46e804ddc2d5f49096a5d57 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 19 Sep 2016 17:34:26 +0100 Subject: [PATCH 2/2] mds: handle blacklisting during journal recovery EBLACKLISTED was being incorrectly handled as an indication of metadata damage. Fixes: http://tracker.ceph.com/issues/17236 Signed-off-by: John Spray (cherry picked from commit 19bb8c0df9b48ebbccfd1913126bb48b6337319e) --- src/mds/MDLog.cc | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index 795ffb3f5b291..260e874705640 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -910,6 +910,10 @@ void MDLog::_recovery_thread(MDSInternalContextBase *completion) int write_result = jp.save(mds->objecter); // Nothing graceful we can do for this assert(write_result >= 0); + } else if (read_result == -EBLACKLISTED) { + derr << "Blacklisted during JournalPointer read! Respawning..." << dendl; + mds->respawn(); + assert(0); // Should be unreachable because respawn calls execv } else if (read_result != 0) { mds->clog->error() << "failed to read JournalPointer: " << read_result << " (" << cpp_strerror(read_result) << ")"; @@ -936,7 +940,11 @@ void MDLog::_recovery_thread(MDSInternalContextBase *completion) C_SaferCond recover_wait; back.recover(&recover_wait); int recovery_result = recover_wait.wait(); - if (recovery_result != 0) { + if (recovery_result == -EBLACKLISTED) { + derr << "Blacklisted during journal recovery! Respawning..." << dendl; + mds->respawn(); + assert(0); // Should be unreachable because respawn calls execv + } else if (recovery_result != 0) { // Journaler.recover succeeds if no journal objects are present: an error // means something worse like a corrupt header, which we can't handle here. mds->clog->error() << "Error recovering journal " << jp.front << ": " @@ -979,7 +987,11 @@ void MDLog::_recovery_thread(MDSInternalContextBase *completion) int recovery_result = recover_wait.wait(); dout(4) << "Journal " << jp.front << " recovered." << dendl; - if (recovery_result != 0) { + if (recovery_result == -EBLACKLISTED) { + derr << "Blacklisted during journal recovery! Respawning..." << dendl; + mds->respawn(); + assert(0); // Should be unreachable because respawn calls execv + } else if (recovery_result != 0) { mds->clog->error() << "Error recovering journal " << jp.front << ": " << cpp_strerror(recovery_result); mds->damaged_unlocked();