Skip to content

Commit

Permalink
Merge pull request #11862 from dachary/wip-17582-jewel
Browse files Browse the repository at this point in the history
jewel: monitor assertion failure when deactivating mds in (invalid) fscid 0

Reviewed-by: John Spray <john.spray@redhat.com>
  • Loading branch information
Loic Dachary committed Nov 14, 2016
2 parents 474a4bf + 804c46c commit 1bc2a7e
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 81 deletions.
79 changes: 27 additions & 52 deletions src/mds/FSMap.cc
Expand Up @@ -499,7 +499,7 @@ int FSMap::parse_filesystem(
std::string ns_err;
fs_cluster_id_t fscid = strict_strtol(ns_str.c_str(), 10, &ns_err);
if (!ns_err.empty() || filesystems.count(fscid) == 0) {
for (auto fs : filesystems) {
for (auto &fs : filesystems) {
if (fs.second->mds_map.fs_name == ns_str) {
*result = std::const_pointer_cast<const Filesystem>(fs.second);
return 0;
Expand Down Expand Up @@ -732,7 +732,7 @@ void FSMap::erase(mds_gid_t who, epoch_t blacklist_epoch)
standby_daemons.erase(who);
standby_epochs.erase(who);
} else {
auto fs = filesystems.at(mds_roles.at(who));
auto &fs = filesystems.at(mds_roles.at(who));
const auto &info = fs->mds_map.mds_info.at(who);
if (info.state != MDSMap::STATE_STANDBY_REPLAY) {
if (info.state == MDSMap::STATE_CREATING) {
Expand Down Expand Up @@ -824,66 +824,41 @@ int FSMap::parse_role(
mds_role_t *role,
std::ostream &ss) const
{
auto colon_pos = role_str.find(":");

if (colon_pos != std::string::npos && colon_pos != role_str.size()) {
auto fs_part = role_str.substr(0, colon_pos);
auto rank_part = role_str.substr(colon_pos + 1);

std::string err;
fs_cluster_id_t fs_id = FS_CLUSTER_ID_NONE;
long fs_id_i = strict_strtol(fs_part.c_str(), 10, &err);
if (fs_id_i < 0 || !err.empty()) {
// Try resolving as name
auto fs = get_filesystem(fs_part);
if (fs == nullptr) {
ss << "Unknown filesystem name '" << fs_part << "'";
return -EINVAL;
} else {
fs_id = fs->fscid;
}
} else {
fs_id = fs_id_i;
}

mds_rank_t rank;
long rank_i = strict_strtol(rank_part.c_str(), 10, &err);
if (rank_i < 0 || !err.empty()) {
ss << "Invalid rank '" << rank_part << "'";
return -EINVAL;
} else {
rank = rank_i;
}

*role = {fs_id, rank};
} else {
std::string err;
long who_i = strict_strtol(role_str.c_str(), 10, &err);
if (who_i < 0 || !err.empty()) {
ss << "Invalid rank '" << role_str << "'";
return -EINVAL;
}

size_t colon_pos = role_str.find(":");
size_t rank_pos;
std::shared_ptr<const Filesystem> fs;
if (colon_pos == std::string::npos) {
if (legacy_client_fscid == FS_CLUSTER_ID_NONE) {
ss << "No filesystem selected";
return -ENOENT;
} else {
*role = mds_role_t(legacy_client_fscid, who_i);
}
fs = get_filesystem(legacy_client_fscid);
rank_pos = 0;
} else {
if (parse_filesystem(role_str.substr(0, colon_pos), &fs) < 0) {
ss << "Invalid filesystem";
return -ENOENT;
}
rank_pos = colon_pos+1;
}

// Now check that the role actually exists
if (get_filesystem(role->fscid) == nullptr) {
ss << "Filesystem with ID '" << role->fscid << "' not found";
return -ENOENT;
mds_rank_t rank;
std::string err;
std::string rank_str = role_str.substr(rank_pos);
long rank_i = strict_strtol(rank_str.c_str(), 10, &err);
if (rank_i < 0 || !err.empty()) {
ss << "Invalid rank '" << rank_str << "'";
return -EINVAL;
} else {
rank = rank_i;
}

auto fs = get_filesystem(role->fscid);
if (fs->mds_map.in.count(role->rank) == 0) {
ss << "Rank '" << role->rank << "' not found";
if (fs->mds_map.in.count(rank) == 0) {
ss << "Rank '" << rank << "' not found";
return -ENOENT;
}

*role = {fs->fscid, rank};

return 0;
}

32 changes: 11 additions & 21 deletions src/mds/FSMap.h
Expand Up @@ -375,20 +375,21 @@ class FSMap {
});
}

const std::map<fs_cluster_id_t, std::shared_ptr<Filesystem> > &get_filesystems() const
{
return filesystems;
}
bool any_filesystems() const {return !filesystems.empty(); }
bool filesystem_exists(fs_cluster_id_t fscid) const
{return filesystems.count(fscid) > 0;}

epoch_t get_epoch() const { return epoch; }
void inc_epoch() { epoch++; }

std::shared_ptr<const Filesystem> get_filesystem(fs_cluster_id_t fscid) const
size_t filesystem_count() const {return filesystems.size();}
bool filesystem_exists(fs_cluster_id_t fscid) const {return filesystems.count(fscid) > 0;}
std::shared_ptr<const Filesystem> get_filesystem(fs_cluster_id_t fscid) const {return std::const_pointer_cast<const Filesystem>(filesystems.at(fscid));}
std::shared_ptr<const Filesystem> get_filesystem(void) const {return std::const_pointer_cast<const Filesystem>(filesystems.begin()->second);}
std::shared_ptr<const Filesystem> get_filesystem(const std::string &name) const
{
return std::const_pointer_cast<const Filesystem>(filesystems.at(fscid));
for (auto &i : filesystems) {
if (i.second->mds_map.fs_name == name) {
return std::const_pointer_cast<const Filesystem>(i.second);
}
}
return nullptr;
}

int parse_filesystem(
Expand Down Expand Up @@ -424,17 +425,6 @@ class FSMap {
void get_health(list<pair<health_status_t,std::string> >& summary,
list<pair<health_status_t,std::string> > *detail) const;

std::shared_ptr<const Filesystem> get_filesystem(const std::string &name) const
{
for (auto &i : filesystems) {
if (i.second->mds_map.fs_name == name) {
return i.second;
}
}

return nullptr;
}

/**
* Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are
* all self-consistent.
Expand Down
5 changes: 2 additions & 3 deletions src/mon/MDSMonitor.cc
Expand Up @@ -1555,7 +1555,7 @@ int MDSMonitor::management_command(
}
}

if (pending_fsmap.any_filesystems()
if (pending_fsmap.filesystem_count() > 0
&& !pending_fsmap.get_enable_multiple()) {
ss << "Creation of multiple filesystems is disabled. To enable "
"this experimental feature, use 'ceph fs flag set enable_multiple "
Expand Down Expand Up @@ -2819,8 +2819,7 @@ bool MDSMonitor::maybe_promote_standby(std::shared_ptr<Filesystem> fs)
// that doesn't correspond to an existing filesystem, especially
// if we loaded from a version with a bug (#17466)
if (info.standby_for_fscid != FS_CLUSTER_ID_NONE
&& pending_fsmap.get_filesystems().count(
info.standby_for_fscid) == 0) {
&& !pending_fsmap.filesystem_exists(info.standby_for_fscid)) {
derr << "gid " << gid << " has invalid standby_for_fscid "
<< info.standby_for_fscid << dendl;
continue;
Expand Down
2 changes: 1 addition & 1 deletion src/mon/Monitor.cc
Expand Up @@ -2478,7 +2478,7 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f)
ss << " monmap " << *monmap << "\n";
ss << " election epoch " << get_epoch()
<< ", quorum " << get_quorum() << " " << get_quorum_names() << "\n";
if (mdsmon()->fsmap.any_filesystems()) {
if (mdsmon()->fsmap.filesystem_count() > 0) {
ss << " fsmap " << mdsmon()->fsmap << "\n";
}

Expand Down
4 changes: 2 additions & 2 deletions src/tools/cephfs/DataScan.cc
Expand Up @@ -212,8 +212,8 @@ int DataScan::main(const std::vector<const char*> &args)
// If caller didn't specify a namespace, try to pick
// one if only one exists
if (fscid == FS_CLUSTER_ID_NONE) {
if (fsmap->get_filesystems().size() == 1) {
fscid = fsmap->get_filesystems().begin()->first;
if (fsmap->filesystem_count() == 1) {
fscid = fsmap->get_filesystem()->fscid;
} else {
std::cerr << "Specify a filesystem with --filesystem" << std::endl;
return -EINVAL;
Expand Down
4 changes: 2 additions & 2 deletions src/tools/cephfs/RoleSelector.cc
Expand Up @@ -35,8 +35,8 @@ int MDSRoleSelector::parse(const FSMap &fsmap, std::string const &str)
if (colon_pos == std::string::npos) {
// An unqualified rank. Only valid if there is only one
// namespace.
if (fsmap.get_filesystems().size() == 1) {
fscid = fsmap.get_filesystems().begin()->first;
if (fsmap.filesystem_count() == 1) {
fscid = fsmap.get_filesystem()->fscid;
return parse_rank(fsmap, str);
} else {
return -EINVAL;
Expand Down

0 comments on commit 1bc2a7e

Please sign in to comment.