Skip to content

Commit

Permalink
Merge pull request #13594 from athanatos/wip-snap-trim-sleep
Browse files Browse the repository at this point in the history
osd: add snap trim reservation and re-implement osd_snap_trim_sleep

Reviewed-by: Josh Durgin <jdurgin@redhat.com>
  • Loading branch information
athanatos committed Feb 24, 2017
2 parents 4f856fe + 2ed7759 commit 44b26f6
Show file tree
Hide file tree
Showing 11 changed files with 267 additions and 77 deletions.
1 change: 1 addition & 0 deletions qa/suites/rados/thrash/thrashers/default.yaml
Expand Up @@ -10,6 +10,7 @@ tasks:
osd scrub min interval: 60
osd scrub max interval: 120
osd max backfills: 3
osd snap trim sleep: 2
- thrashosds:
timeout: 1200
chance_pgnum_grow: 1
Expand Down
1 change: 1 addition & 0 deletions qa/suites/rados/thrash/thrashers/pggrow.yaml
Expand Up @@ -10,6 +10,7 @@ tasks:
osd scrub max interval: 120
filestore odsync write: true
osd max backfills: 2
osd snap trim sleep: .5
- thrashosds:
timeout: 1200
chance_pgnum_grow: 2
Expand Down
27 changes: 27 additions & 0 deletions qa/tasks/ceph_manager.py
Expand Up @@ -1650,6 +1650,33 @@ def do_pg_scrub(self, pool, pgnum, stype):
time.sleep(SLEEP_TIME)
timer += SLEEP_TIME

def wait_snap_trimming_complete(self, pool):
"""
Wait for snap trimming on pool to end
"""
POLL_PERIOD = 10
FATAL_TIMEOUT = 600
start = time.time()
poolnum = self.get_pool_num(pool)
poolnumstr = "%s." % (poolnum,)
while (True):
now = time.time()
if (now - start) > FATAL_TIMEOUT:
assert (now - start) < FATAL_TIMEOUT, \
'failed to complete snap trimming before timeout'
all_stats = self.get_pg_stats()
trimming = False
for pg in all_stats:
if (poolnumstr in pg['pgid']) and ('snaptrim' in pg['state']):
self.log("pg {pg} in trimming, state: {state}".format(
pg=pg['pgid'],
state=pg['state']))
trimming = True
if not trimming:
break
self.log("{pool} still trimming, waiting".format(pool=pool))
time.sleep(POLL_PERIOD)

def get_single_pg_stats(self, pgid):
"""
Return pg for the pgid specified.
Expand Down
1 change: 1 addition & 0 deletions qa/tasks/rados.py
Expand Up @@ -250,6 +250,7 @@ def thread():
run.wait(tests.itervalues())

for pool in created_pools:
manager.wait_snap_trimming_complete(pool);
manager.remove_pool(pool)

running = gevent.spawn(thread)
Expand Down
4 changes: 3 additions & 1 deletion src/common/config_opts.h
Expand Up @@ -761,7 +761,7 @@ OPTION(osd_op_thread_suicide_timeout, OPT_INT, 150)
OPTION(osd_recovery_thread_timeout, OPT_INT, 30)
OPTION(osd_recovery_thread_suicide_timeout, OPT_INT, 300)
OPTION(osd_recovery_sleep, OPT_FLOAT, 0) // seconds to sleep between recovery ops
OPTION(osd_snap_trim_sleep, OPT_FLOAT, 0)
OPTION(osd_snap_trim_sleep, OPT_DOUBLE, 0)
OPTION(osd_scrub_invalid_stats, OPT_BOOL, true)
OPTION(osd_remove_thread_timeout, OPT_INT, 60*60)
OPTION(osd_remove_thread_suicide_timeout, OPT_INT, 10*60*60)
Expand All @@ -778,6 +778,8 @@ OPTION(osd_heartbeat_use_min_delay_socket, OPT_BOOL, false) // prio the heartbea

// max number of parallel snap trims/pg
OPTION(osd_pg_max_concurrent_snap_trims, OPT_U64, 2)
// max number of trimming pgs
OPTION(osd_max_trimming_pgs, OPT_U64, 2)

// minimum number of peers that must be reachable to mark ourselves
// back up after being wrongly marked down.
Expand Down
15 changes: 15 additions & 0 deletions src/osd/OSD.cc
Expand Up @@ -261,6 +261,11 @@ OSDService::OSDService(OSD *osd) :
remote_reserver(&reserver_finisher, cct->_conf->osd_max_backfills,
cct->_conf->osd_min_recovery_priority),
pg_temp_lock("OSDService::pg_temp_lock"),
snap_sleep_lock("OSDService::snap_sleep_lock"),
snap_sleep_timer(
osd->client_messenger->cct, snap_sleep_lock, false /* relax locking */),
snap_reserver(&reserver_finisher,
cct->_conf->osd_max_trimming_pgs),
recovery_lock("OSDService::recovery_lock"),
recovery_ops_active(0),
recovery_ops_reserved(0),
Expand Down Expand Up @@ -492,6 +497,12 @@ void OSDService::shutdown()
Mutex::Locker l(backfill_request_lock);
backfill_request_timer.shutdown();
}

{
Mutex::Locker l(snap_sleep_lock);
snap_sleep_timer.shutdown();
}

osdmap = OSDMapRef();
next_osdmap = OSDMapRef();
}
Expand All @@ -503,6 +514,7 @@ void OSDService::init()
objecter->set_client_incarnation(0);
watch_timer.init();
agent_timer.init();
snap_sleep_timer.init();

agent_thread.create("osd_srv_agent");

Expand Down Expand Up @@ -9272,6 +9284,9 @@ void OSD::handle_conf_change(const struct md_config_t *conf,
service.local_reserver.set_min_priority(cct->_conf->osd_min_recovery_priority);
service.remote_reserver.set_min_priority(cct->_conf->osd_min_recovery_priority);
}
if (changed.count("osd_max_trimming_pgs")) {
service.snap_reserver.set_max(cct->_conf->osd_max_trimming_pgs);
}
if (changed.count("osd_op_complaint_time") ||
changed.count("osd_op_log_threshold")) {
op_tracker.set_complaint_and_threshold(cct->_conf->osd_op_complaint_time,
Expand Down
6 changes: 6 additions & 0 deletions src/osd/OSD.h
Expand Up @@ -910,7 +910,13 @@ class OSDService {
void send_pg_temp();

void queue_for_peering(PG *pg);

Mutex snap_sleep_lock;
SafeTimer snap_sleep_timer;

AsyncReserver<spg_t> snap_reserver;
void queue_for_snap_trim(PG *pg);

void queue_for_scrub(PG *pg) {
op_wq.queue(
make_pair(
Expand Down
65 changes: 31 additions & 34 deletions src/osd/PrimaryLogPG.cc
Expand Up @@ -3654,14 +3654,6 @@ void PrimaryLogPG::snap_trimmer(epoch_t queued)
if (deleting || pg_has_reset_since(queued)) {
return;
}
if (cct->_conf->osd_snap_trim_sleep > 0) {
unlock();
utime_t t;
t.set_from_double(cct->_conf->osd_snap_trim_sleep);
t.sleep();
lock();
dout(20) << __func__ << " slept for " << t << dendl;
}

assert(is_primary());

Expand Down Expand Up @@ -12969,11 +12961,6 @@ void PrimaryLogPG::_scrub_finish()
#undef dout_prefix
#define dout_prefix *_dout << pg->gen_prefix()

PrimaryLogPG::SnapTrimmer::~SnapTrimmer()
{
in_flight.clear();
}

void PrimaryLogPG::SnapTrimmer::log_enter(const char *state_name)
{
ldout(pg->cct, 20) << "enter " << state_name << dendl;
Expand Down Expand Up @@ -13007,52 +12994,63 @@ boost::statechart::result PrimaryLogPG::NotTrimming::react(const KickTrim&)
PrimaryLogPG *pg = context< SnapTrimmer >().pg;
ldout(pg->cct, 10) << "NotTrimming react KickTrim" << dendl;

assert(pg->is_primary() && pg->is_active());
if (!(pg->is_primary() && pg->is_active())) {
ldout(pg->cct, 10) << "NotTrimming not primary or active" << dendl;
return discard_event();
}
if (!pg->is_clean() ||
pg->snap_trimq.empty()) {
ldout(pg->cct, 10) << "NotTrimming not clean or nothing to trim" << dendl;
return discard_event();
}

if (pg->scrubber.active) {
ldout(pg->cct, 10) << " scrubbing, will requeue snap_trimmer after" << dendl;
pg->scrubber.queue_snap_trim = true;
return transit< WaitScrub >();
} else {
context<SnapTrimmer>().snap_to_trim = pg->snap_trimq.range_start();
ldout(pg->cct, 10) << "NotTrimming: trimming "
<< pg->snap_trimq.range_start()
<< dendl;
return transit< AwaitAsyncWork >();
return transit< Trimming >();
}
}

boost::statechart::result PrimaryLogPG::WaitReservation::react(const SnapTrimReserved&)
{
PrimaryLogPG *pg = context< SnapTrimmer >().pg;
ldout(pg->cct, 10) << "WaitReservation react SnapTrimReserved" << dendl;

pending = nullptr;
if (!context< SnapTrimmer >().can_trim()) {
post_event(KickTrim());
return transit< NotTrimming >();
}

context<Trimming>().snap_to_trim = pg->snap_trimq.range_start();
ldout(pg->cct, 10) << "NotTrimming: trimming "
<< pg->snap_trimq.range_start()
<< dendl;
return transit< AwaitAsyncWork >();
}

/* AwaitAsyncWork */
PrimaryLogPG::AwaitAsyncWork::AwaitAsyncWork(my_context ctx)
: my_base(ctx),
NamedState(context< SnapTrimmer >().pg->cct, "Trimming/AwaitAsyncWork")
{
auto *pg = context< SnapTrimmer >().pg;
context< SnapTrimmer >().log_enter(state_name);
context< SnapTrimmer >().pg->osd->queue_for_snap_trim(
context< SnapTrimmer >().pg);
}

void PrimaryLogPG::AwaitAsyncWork::exit()
{
context< SnapTrimmer >().log_exit(state_name, enter_time);
context< SnapTrimmer >().pg->osd->queue_for_snap_trim(pg);
pg->state_set(PG_STATE_SNAPTRIM);
pg->publish_stats_to_osd();
}

boost::statechart::result PrimaryLogPG::AwaitAsyncWork::react(const DoSnapWork&)
{
PrimaryLogPGRef pg = context< SnapTrimmer >().pg;
ldout(pg->cct, 10) << "AwaitAsyncWork react" << dendl;
snapid_t snap_to_trim = context<SnapTrimmer>().snap_to_trim;
auto &in_flight = context<SnapTrimmer>().in_flight;
snapid_t snap_to_trim = context<Trimming>().snap_to_trim;
auto &in_flight = context<Trimming>().in_flight;
assert(in_flight.empty());

assert(pg->is_primary() && pg->is_active());
if (!pg->is_clean() ||
pg->scrubber.active) {
if (!context< SnapTrimmer >().can_trim()) {
ldout(pg->cct, 10) << "something changed, reverting to NotTrimming" << dendl;
post_event(KickTrim());
return transit< NotTrimming >();
Expand Down Expand Up @@ -13109,8 +13107,7 @@ boost::statechart::result PrimaryLogPG::AwaitAsyncWork::react(const DoSnapWork&)
return transit< WaitRWLock >();

} else {
ldout(pg->cct, 10) << "letting the ones we already started finish"
<< dendl;
ldout(pg->cct, 10) << "letting the ones we already started finish" << dendl;
return transit< WaitRepops >();
}
}
Expand Down

0 comments on commit 44b26f6

Please sign in to comment.