Skip to content

Commit

Permalink
Merge pull request #5953 from guangyy/wip-13121
Browse files Browse the repository at this point in the history
osd: support pool level recovery_priority and recovery_op_priority

Reviewed-by: David Zafman <dzafman@redhat.com>
  • Loading branch information
liewegas committed Dec 4, 2015
2 parents 673d852 + ba346bf commit c3e3ee0
Show file tree
Hide file tree
Showing 9 changed files with 68 additions and 21 deletions.
12 changes: 12 additions & 0 deletions qa/workunits/cephtool/test.sh
Expand Up @@ -1407,6 +1407,18 @@ function test_mon_osd_pool_set()
ceph osd pool set $TEST_POOL_GETSET deep_scrub_interval 0
expect_false "ceph osd pool get $TEST_POOL_GETSET deep_scrub_interval | grep '.'"

expect_false "ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep '.'"
ceph osd pool set $TEST_POOL_GETSET recovery_priority 5
ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep 'recovery_priority: 5'
ceph osd pool set $TEST_POOL_GETSET recovery_priority 0
expect_false "ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep '.'"

expect_false "ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | grep '.'"
ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 5
ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | grep 'recovery_op_priority: 5'
ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 0
expect_false "ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | grep '.'"

ceph osd pool set $TEST_POOL_GETSET nopgchange 1
expect_false ceph osd pool set $TEST_POOL_GETSET pg_num 10
expect_false ceph osd pool set $TEST_POOL_GETSET pgp_num 10
Expand Down
4 changes: 2 additions & 2 deletions src/mon/MonCommands.h
Expand Up @@ -674,11 +674,11 @@ COMMAND("osd pool rename " \
"rename <srcpool> to <destpool>", "osd", "rw", "cli,rest")
COMMAND("osd pool get " \
"name=pool,type=CephPoolname " \
"name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval", \
"name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority", \
"get pool parameter <var>", "osd", "r", "cli,rest")
COMMAND("osd pool set " \
"name=pool,type=CephPoolname " \
"name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval " \
"name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority " \
"name=val,type=CephString " \
"name=force,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \
"set pool parameter <var> to <val>", "osd", "rw", "cli,rest")
Expand Down
11 changes: 9 additions & 2 deletions src/mon/OSDMonitor.cc
Expand Up @@ -2907,7 +2907,8 @@ namespace {
ERASURE_CODE_PROFILE, MIN_READ_RECENCY_FOR_PROMOTE,
MIN_WRITE_RECENCY_FOR_PROMOTE, FAST_READ,
HIT_SET_GRADE_DECAY_RATE, HIT_SET_SEARCH_LAST_N,
SCRUB_MIN_INTERVAL, SCRUB_MAX_INTERVAL, DEEP_SCRUB_INTERVAL};
SCRUB_MIN_INTERVAL, SCRUB_MAX_INTERVAL, DEEP_SCRUB_INTERVAL,
RECOVERY_PRIORITY, RECOVERY_OP_PRIORITY};

std::set<osd_pool_get_choices>
subtract_second_from_first(const std::set<osd_pool_get_choices>& first,
Expand Down Expand Up @@ -3386,7 +3387,9 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
("hit_set_search_last_n", HIT_SET_SEARCH_LAST_N)
("scrub_min_interval", SCRUB_MIN_INTERVAL)
("scrub_max_interval", SCRUB_MAX_INTERVAL)
("deep_scrub_interval", DEEP_SCRUB_INTERVAL);
("deep_scrub_interval", DEEP_SCRUB_INTERVAL)
("recovery_priority", RECOVERY_PRIORITY)
("recovery_op_priority", RECOVERY_OP_PRIORITY);

typedef std::set<osd_pool_get_choices> choices_set_t;

Expand Down Expand Up @@ -3568,6 +3571,8 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
case SCRUB_MIN_INTERVAL:
case SCRUB_MAX_INTERVAL:
case DEEP_SCRUB_INTERVAL:
case RECOVERY_PRIORITY:
case RECOVERY_OP_PRIORITY:
for (i = ALL_CHOICES.begin(); i != ALL_CHOICES.end(); ++i) {
if (i->second == *it)
break;
Expand Down Expand Up @@ -3699,6 +3704,8 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
case SCRUB_MIN_INTERVAL:
case SCRUB_MAX_INTERVAL:
case DEEP_SCRUB_INTERVAL:
case RECOVERY_PRIORITY:
case RECOVERY_OP_PRIORITY:
for (i = ALL_CHOICES.begin(); i != ALL_CHOICES.end(); ++i) {
if (i->second == *it)
break;
Expand Down
25 changes: 16 additions & 9 deletions src/osd/PG.cc
Expand Up @@ -2079,26 +2079,33 @@ void PG::mark_clean()
unsigned PG::get_recovery_priority()
{
// a higher value -> a higher priority
return OSD_RECOVERY_PRIORITY_MAX;

int pool_recovery_priority = 0;
pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority);

unsigned ret = OSD_RECOVERY_PRIORITY_BASE + pool_recovery_priority;
if (ret > OSD_RECOVERY_PRIORITY_MAX)
ret = OSD_RECOVERY_PRIORITY_MAX;
return ret;
}

unsigned PG::get_backfill_priority()
{
// a higher value -> a higher priority

// undersized: 200 + num missing replicas
unsigned ret = OSD_BACKFILL_PRIORITY_BASE;
if (is_undersized()) {
// undersized: OSD_BACKFILL_DEGRADED_PRIORITY_BASE + num missing replicas
assert(pool.info.size > actingset.size());
return 200 + (pool.info.size - actingset.size());
}
ret = OSD_BACKFILL_DEGRADED_PRIORITY_BASE + (pool.info.size - actingset.size());

// degraded: baseline degraded
if (is_degraded()) {
return 200;
} else if (is_degraded()) {
// degraded: baseline degraded
ret = OSD_BACKFILL_DEGRADED_PRIORITY_BASE;
}
assert (ret < OSD_RECOVERY_PRIORITY_MAX);

// baseline
return 1;
return ret;
}

void PG::finish_recovery(list<Context*>& tfin)
Expand Down
10 changes: 5 additions & 5 deletions src/osd/ReplicatedPG.cc
Expand Up @@ -3127,7 +3127,7 @@ void ReplicatedPG::do_backfill(OpRequestRef op)
get_osdmap()->get_epoch(),
m->query_epoch,
spg_t(info.pgid.pgid, primary.shard));
reply->set_priority(cct->_conf->osd_recovery_op_priority);
reply->set_priority(get_recovery_op_priority());
osd->send_message_osd_cluster(reply, m->get_connection());
queue_peering_event(
CephPeeringEvtRef(
Expand Down Expand Up @@ -10199,7 +10199,7 @@ int ReplicatedPG::recover_primary(int max, ThreadPool::TPHandle &handle)
++skipped;
} else {
int r = recover_missing(
soid, need, cct->_conf->osd_recovery_op_priority, h);
soid, need, get_recovery_op_priority(), h);
switch (r) {
case PULL_YES:
++started;
Expand All @@ -10222,7 +10222,7 @@ int ReplicatedPG::recover_primary(int max, ThreadPool::TPHandle &handle)
pg_log.set_last_requested(v);
}

pgbackend->run_recovery_op(h, cct->_conf->osd_recovery_op_priority);
pgbackend->run_recovery_op(h, get_recovery_op_priority());
return started;
}

Expand Down Expand Up @@ -10364,7 +10364,7 @@ int ReplicatedPG::recover_replicas(int max, ThreadPool::TPHandle &handle)
}
}

pgbackend->run_recovery_op(h, cct->_conf->osd_recovery_op_priority);
pgbackend->run_recovery_op(h, get_recovery_op_priority());
return started;
}

Expand Down Expand Up @@ -10709,7 +10709,7 @@ int ReplicatedPG::recover_backfill(
prep_backfill_object_push(to_push[i].get<0>(), to_push[i].get<1>(),
to_push[i].get<2>(), to_push[i].get<3>(), h);
}
pgbackend->run_recovery_op(h, cct->_conf->osd_recovery_op_priority);
pgbackend->run_recovery_op(h, get_recovery_op_priority());

dout(5) << "backfill_pos is " << backfill_pos << dendl;
for (set<hobject_t, hobject_t::Comparator>::iterator i = backfills_in_flight.begin();
Expand Down
5 changes: 5 additions & 0 deletions src/osd/ReplicatedPG.h
Expand Up @@ -1491,6 +1491,11 @@ class ReplicatedPG : public PG, public PGBackend::Listener {
hobject_t generate_temp_object(); ///< generate a new temp object name
/// generate a new temp object name (for recovery)
hobject_t get_temp_recovery_object(eversion_t version, snapid_t snap);
int get_recovery_op_priority() const {
int pri = 0;
pool.info.opts.get(pool_opts_t::RECOVERY_OP_PRIORITY, &pri);
return pri > 0 ? pri : cct->_conf->osd_recovery_op_priority;
}
void log_missing(unsigned missing,
const boost::optional<hobject_t> &head,
LogChannelRef clog,
Expand Down
6 changes: 5 additions & 1 deletion src/osd/osd_types.cc
Expand Up @@ -900,7 +900,11 @@ static opt_mapping_t opt_mapping = boost::assign::map_list_of
("scrub_max_interval", pool_opts_t::opt_desc_t(
pool_opts_t::SCRUB_MAX_INTERVAL, pool_opts_t::DOUBLE))
("deep_scrub_interval", pool_opts_t::opt_desc_t(
pool_opts_t::DEEP_SCRUB_INTERVAL, pool_opts_t::DOUBLE));
pool_opts_t::DEEP_SCRUB_INTERVAL, pool_opts_t::DOUBLE))
("recovery_priority", pool_opts_t::opt_desc_t(
pool_opts_t::RECOVERY_PRIORITY, pool_opts_t::INT))
("recovery_op_priority", pool_opts_t::opt_desc_t(
pool_opts_t::RECOVERY_OP_PRIORITY, pool_opts_t::INT));

bool pool_opts_t::is_opt_name(const std::string& name) {
return opt_mapping.find(name) != opt_mapping.end();
Expand Down
10 changes: 10 additions & 0 deletions src/osd/osd_types.h
Expand Up @@ -63,6 +63,14 @@
/// max recovery priority for MBackfillReserve
#define OSD_RECOVERY_PRIORITY_MAX 255u

/// base recovery priority for MBackfillReserve
#define OSD_RECOVERY_PRIORITY_BASE 230u

/// base backfill priority for MBackfillReserve (degraded PG)
#define OSD_BACKFILL_DEGRADED_PRIORITY_BASE 200u

/// base backfill priority for MBackfillReserve
#define OSD_BACKFILL_PRIORITY_BASE 1u

typedef hobject_t collection_list_handle_t;

Expand Down Expand Up @@ -902,6 +910,8 @@ class pool_opts_t {
SCRUB_MIN_INTERVAL,
SCRUB_MAX_INTERVAL,
DEEP_SCRUB_INTERVAL,
RECOVERY_PRIORITY,
RECOVERY_OP_PRIORITY
};

enum type_t {
Expand Down
6 changes: 4 additions & 2 deletions src/test/pybind/test_ceph_argparse.py
Expand Up @@ -1034,7 +1034,8 @@ def test_pool_get(self):
for var in ('size', 'min_size', 'crash_replay_interval',
'pg_num', 'pgp_num', 'crush_ruleset', 'auid', 'fast_read',
'scrub_min_interval', 'scrub_max_interval',
'deep_scrub_interval'):
'deep_scrub_interval', 'recovery_priority',
'recovery_op_priority'):
self.assert_valid_command(['osd', 'pool', 'get', 'poolname', var])
assert_equal({}, validate_command(sigdict, ['osd', 'pool']))
assert_equal({}, validate_command(sigdict, ['osd', 'pool',
Expand All @@ -1053,7 +1054,8 @@ def test_pool_set(self):
'pg_num', 'pgp_num', 'crush_ruleset',
'hashpspool', 'auid', 'fast_read',
'scrub_min_interval', 'scrub_max_interval',
'deep_scrub_interval'):
'deep_scrub_interval', 'recovery_priority',
'recovery_op_priority'):
self.assert_valid_command(['osd', 'pool',
'set', 'poolname', var, 'value'])
assert_equal({}, validate_command(sigdict, ['osd', 'pool',
Expand Down

0 comments on commit c3e3ee0

Please sign in to comment.