Skip to content

Commit

Permalink
Merge pull request #10757: jewel: mon/osdmonitor: decouple adjust_hea…
Browse files Browse the repository at this point in the history
…rtbeat_grace and min_down_reporters

Reviewed-by: Loic Dachary <ldachary@redhat.com>
  • Loading branch information
Loic Dachary committed Aug 24, 2016
2 parents fac895b + 0b30a1d commit a52d7f0
Showing 1 changed file with 25 additions and 20 deletions.
45 changes: 25 additions & 20 deletions src/mon/OSDMonitor.cc
Expand Up @@ -1733,9 +1733,10 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi)

utime_t grace = orig_grace;
double my_grace = 0, peer_grace = 0;
double decay_k = 0;
if (g_conf->mon_osd_adjust_heartbeat_grace) {
double halflife = (double)g_conf->mon_osd_laggy_halflife;
double decay_k = ::log(.5) / halflife;
decay_k = ::log(.5) / halflife;

// scale grace period based on historical probability of 'lagginess'
// (false positive failures due to slowness).
Expand All @@ -1745,31 +1746,35 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi)
<< " failed_for " << failed_for << " decay " << decay << dendl;
my_grace = decay * (double)xi.laggy_interval * xi.laggy_probability;
grace += my_grace;
}

// consider the peers reporting a failure a proxy for a potential
// 'subcluster' over the overall cluster that is similarly
// laggy. this is clearly not true in all cases, but will sometimes
// help us localize the grace correction to a subset of the system
// (say, a rack with a bad switch) that is unhappy.
assert(fi.reporters.size());
for (map<int,failure_reporter_t>::iterator p = fi.reporters.begin();
p != fi.reporters.end();
++p) {
// get the parent bucket whose type matches with "reporter_subtree_level".
// fall back to OSD if the level doesn't exist.
map<string, string> reporter_loc = osdmap.crush->get_full_location(p->first);
map<string, string>::iterator iter = reporter_loc.find(reporter_subtree_level);
if (iter == reporter_loc.end()) {
reporters_by_subtree.insert("osd." + to_string(p->first));
} else {
reporters_by_subtree.insert(iter->second);
}

// consider the peers reporting a failure a proxy for a potential
// 'subcluster' over the overall cluster that is similarly
// laggy. this is clearly not true in all cases, but will sometimes
// help us localize the grace correction to a subset of the system
// (say, a rack with a bad switch) that is unhappy.
assert(fi.reporters.size());
for (map<int,failure_reporter_t>::iterator p = fi.reporters.begin();
p != fi.reporters.end();
++p) {
// get the parent bucket whose type matches with "reporter_subtree_level".
// fall back to OSD if the level doesn't exist.
map<string, string> reporter_loc = osdmap.crush->get_full_location(p->first);
map<string, string>::iterator iter = reporter_loc.find(reporter_subtree_level);
if (iter == reporter_loc.end()) {
reporters_by_subtree.insert("osd." + to_string(p->first));
} else {
reporters_by_subtree.insert(iter->second);
}
if (g_conf->mon_osd_adjust_heartbeat_grace) {
const osd_xinfo_t& xi = osdmap.get_xinfo(p->first);
utime_t elapsed = now - xi.down_stamp;
double decay = exp((double)elapsed * decay_k);
peer_grace += decay * (double)xi.laggy_interval * xi.laggy_probability;
}
}

if (g_conf->mon_osd_adjust_heartbeat_grace) {
peer_grace /= (double)fi.reporters.size();
grace += peer_grace;
}
Expand Down

0 comments on commit a52d7f0

Please sign in to comment.