Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mon/PGMonitor: batch filter pg states; add sanity check #9394

Merged
merged 5 commits into from Jul 4, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/dev/osd_internals/recovery_reservation.rst
Expand Up @@ -62,7 +62,7 @@ to the monitor. The state chart can set:

- recovery_wait: waiting for local/remote reservations
- recovering: recovering
- wait_backfill: waiting for remote backfill reservations
- backfill_wait: waiting for remote backfill reservations
- backfilling: backfilling
- backfill_toofull: backfill reservation rejected, OSD too full

Expand Down
11 changes: 2 additions & 9 deletions src/mon/PGMap.cc
Expand Up @@ -1754,24 +1754,17 @@ void PGMap::generate_test_instances(list<PGMap*>& o)
}
}

void PGMap::get_filtered_pg_stats(const string& state, int64_t poolid, int64_t osdid,
void PGMap::get_filtered_pg_stats(uint32_t state, int64_t poolid, int64_t osdid,
bool primary, set<pg_t>& pgs)
{
int type = 0;
if (state != "all") {
type = pg_string_state(state);
if (type == -1)
assert(0 == "invalid type");
}

for (ceph::unordered_map<pg_t, pg_stat_t>::const_iterator i = pg_stat.begin();
i != pg_stat.end();
++i) {
if ((poolid >= 0) && (uint64_t(poolid) != i->first.pool()))
continue;
if ((osdid >= 0) && !(i->second.is_acting_osd(osdid,primary)))
continue;
if ((state != "all") && !(i->second.state & type))
if (!(i->second.state & state))
continue;
pgs.insert(i->first);
}
Expand Down
2 changes: 1 addition & 1 deletion src/mon/PGMap.h
Expand Up @@ -300,7 +300,7 @@ class PGMap {
void dump_osd_blocked_by_stats(Formatter *f) const;
void print_osd_blocked_by_stats(std::ostream *ss) const;

void get_filtered_pg_stats(const string& state, int64_t poolid, int64_t osdid,
void get_filtered_pg_stats(uint32_t state, int64_t poolid, int64_t osdid,
bool primary, set<pg_t>& pgs);
void recovery_summary(Formatter *f, list<string> *psl,
const pool_stat_t& delta_sum) const;
Expand Down
77 changes: 38 additions & 39 deletions src/mon/PGMonitor.cc
Expand Up @@ -621,17 +621,20 @@ void PGMonitor::handle_statfs(MonOpRequestRef op)
<< session->caps << dendl;
return;
}
MStatfsReply *reply;

dout(10) << "handle_statfs " << *statfs << " from " << statfs->get_orig_source() << dendl;

if (statfs->fsid != mon->monmap->fsid) {
dout(0) << "handle_statfs on fsid " << statfs->fsid << " != " << mon->monmap->fsid << dendl;
dout(0) << "handle_statfs on fsid " << statfs->fsid
<< " != " << mon->monmap->fsid << dendl;
return;
}


dout(10) << "handle_statfs " << *statfs
<< " from " << statfs->get_orig_source() << dendl;

// fill out stfs
reply = new MStatfsReply(mon->monmap->fsid, statfs->get_tid(), get_last_committed());
MStatfsReply *reply = new MStatfsReply(mon->monmap->fsid, statfs->get_tid(),
get_last_committed());

// these are in KB.
reply->h.st.kb = pg_map.osd_sum.kb;
Expand Down Expand Up @@ -932,23 +935,15 @@ void PGMonitor::check_osd_map(epoch_t epoch)
}
}

bool propose = false;
if (pg_map.last_osdmap_epoch < epoch) {
pending_inc.osdmap_epoch = epoch;
propose = true;
}

if (map_pg_creates())
propose = true;
if (register_new_pgs())
propose = true;
assert(pg_map.last_osdmap_epoch < epoch);
pending_inc.osdmap_epoch = epoch;
map_pg_creates();
register_new_pgs();

if ((need_check_down_pgs || !need_check_down_pg_osds.empty()) &&
check_down_pgs())
propose = true;
if (need_check_down_pgs || !need_check_down_pg_osds.empty())
check_down_pgs();

if (propose)
propose_pending();
propose_pending();
}

void PGMonitor::register_pg(OSDMap *osdmap,
Expand Down Expand Up @@ -1034,7 +1029,7 @@ void PGMonitor::register_pg(OSDMap *osdmap,
}
}

bool PGMonitor::register_new_pgs()
void PGMonitor::register_new_pgs()
{
// iterate over crush mapspace
OSDMap *osdmap = &mon->osdmon()->osdmap;
Expand Down Expand Up @@ -1116,10 +1111,9 @@ bool PGMonitor::register_new_pgs()

dout(10) << "register_new_pgs registered " << created << " new pgs, removed "
<< removed << " uncreated pgs" << dendl;
return (created || removed);
}

bool PGMonitor::map_pg_creates()
void PGMonitor::map_pg_creates()
{
OSDMap *osdmap = &mon->osdmon()->osdmap;

Expand Down Expand Up @@ -1187,9 +1181,7 @@ bool PGMonitor::map_pg_creates()
}
if (changed) {
dout(10) << __func__ << " " << changed << " pgs changed primary" << dendl;
return true;
}
return false;
}

void PGMonitor::send_pg_creates()
Expand Down Expand Up @@ -1300,12 +1292,12 @@ void PGMonitor::_try_mark_pg_stale(
}
}

bool PGMonitor::check_down_pgs()
void PGMonitor::check_down_pgs()
{
dout(10) << "check_down_pgs last_osdmap_epoch "
<< pg_map.last_osdmap_epoch << dendl;
if (pg_map.last_osdmap_epoch == 0)
return false;
return;

// use the OSDMap that matches the one pg_map has consumed.
std::unique_ptr<OSDMap> osdmap;
Expand All @@ -1315,8 +1307,6 @@ bool PGMonitor::check_down_pgs()
osdmap.reset(new OSDMap);
osdmap->decode(bl);

bool ret = false;

// if a large number of osds changed state, just iterate over the whole
// pg map.
if (need_check_down_pg_osds.size() > (unsigned)osdmap->get_num_osds() *
Expand All @@ -1329,7 +1319,6 @@ bool PGMonitor::check_down_pgs()
p.second.acting_primary != -1 &&
osdmap->is_down(p.second.acting_primary)) {
_try_mark_pg_stale(osdmap.get(), p.first, p.second);
ret = true;
}
}
} else {
Expand All @@ -1340,16 +1329,13 @@ bool PGMonitor::check_down_pgs()
assert(stat.acting_primary == osd);
if ((stat.state & PG_STATE_STALE) == 0) {
_try_mark_pg_stale(osdmap.get(), pgid, stat);
ret = true;
}
}
}
}
}
need_check_down_pgs = false;
need_check_down_pg_osds.clear();

return ret;
}

inline string percentify(const float& a) {
Expand Down Expand Up @@ -1756,7 +1742,6 @@ bool PGMonitor::preprocess_command(MonOpRequestRef op)
int64_t pool = -1;
vector<string>states;
set<pg_t> pgs;
set<string> what;
cmd_getval(g_ceph_context, cmdmap, "pool", pool);
cmd_getval(g_ceph_context, cmdmap, "osd", osd);
cmd_getval(g_ceph_context, cmdmap, "states", states);
Expand All @@ -1772,17 +1757,31 @@ bool PGMonitor::preprocess_command(MonOpRequestRef op)
}
if (states.empty())
states.push_back("all");

uint32_t state = 0;

while (!states.empty()) {
string state = states.back();
what.insert(state);
pg_map.get_filtered_pg_stats(state,pool,osd,primary,pgs);
string state_str = states.back();

if (state_str == "all") {
state = -1;
break;
} else {
int filter = pg_string_state(state_str);
assert(filter != -1);
state |= filter;
}

states.pop_back();
}

pg_map.get_filtered_pg_stats(state, pool, osd, primary, pgs);

if (f && !pgs.empty()) {
pg_map.dump_filtered_pg_stats(f.get(),pgs);
pg_map.dump_filtered_pg_stats(f.get(), pgs);
f->flush(ds);
} else if (!pgs.empty()) {
pg_map.dump_filtered_pg_stats(ds,pgs);
pg_map.dump_filtered_pg_stats(ds, pgs);
}
r = 0;
} else if (prefix == "pg dump_stuck") {
Expand Down
11 changes: 3 additions & 8 deletions src/mon/PGMonitor.h
Expand Up @@ -115,17 +115,13 @@ class PGMonitor : public PaxosService {

/**
* check latest osdmap for new pgs to register
*
* @return true if we updated pending_inc (and should propose)
*/
bool register_new_pgs();
void register_new_pgs();

/**
* recalculate creating pg mappings
*
* @return true if we updated pending_inc
*/
bool map_pg_creates();
void map_pg_creates();

void send_pg_creates();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you update the comment accordingly?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, I don't understand what do you mean. It does not return anything now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, sorry, i thought you missed this.

epoch_t send_pg_creates(int osd, Connection *con, epoch_t next);
Expand All @@ -136,9 +132,8 @@ class PGMonitor : public PaxosService {
* clears need_check_down_pgs
* clears need_check_down_pg_osds
*
* @return true if we updated pending_inc (and should propose)
*/
bool check_down_pgs();
void check_down_pgs();
void _try_mark_pg_stale(const OSDMap *osdmap, pg_t pgid,
const pg_stat_t& cur_stat);

Expand Down
2 changes: 1 addition & 1 deletion src/osd/osd_types.cc
Expand Up @@ -824,7 +824,7 @@ std::string pg_state_string(int state)
oss << "repair+";
if ((state & PG_STATE_BACKFILL_WAIT) &&
!(state &PG_STATE_BACKFILL))
oss << "wait_backfill+";
oss << "backfill_wait+";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could update the doc/dev/osd_internals/recovery_reservation.rst accordingly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Thank you for your reminder.

if (state & PG_STATE_BACKFILL)
oss << "backfilling+";
if (state & PG_STATE_BACKFILL_TOOFULL)
Expand Down