Skip to content

Commit

Permalink
osd/: Use MOSDPGUpdateLogMissing to implement mark_unfound_lost_delet…
Browse files Browse the repository at this point in the history
…e safely

Using a MOSDPGLog was unsafe since it is not ordered with
respect to repops.  Instead, use a new message sent through
the same paths as repops.

Signed-off-by: Samuel Just <sjust@redhat.com>
  • Loading branch information
athanatos committed Feb 25, 2016
1 parent 88ee4e7 commit e7edf20
Show file tree
Hide file tree
Showing 11 changed files with 564 additions and 122 deletions.
8 changes: 6 additions & 2 deletions src/osd/OSD.cc
Expand Up @@ -5292,7 +5292,12 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector<string>& cmd, buffe
// simulate pg <pgid> cmd= for pg->do-command
if (prefix != "pg")
cmd_putval(cct, cmdmap, "cmd", prefix);
r = pg->do_command(cmdmap, ss, data, odata);
r = pg->do_command(cmdmap, ss, data, odata, con, tid);
if (r == -EAGAIN) {
pg->unlock();
// don't reply, pg will do so async
return;
}
} else {
ss << "not primary for pgid " << pgid;

Expand Down Expand Up @@ -5592,7 +5597,6 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector<string>& cmd, buffe
reply->set_data(odata);
con->send_message(reply);
}
return;
}


Expand Down
87 changes: 59 additions & 28 deletions src/osd/PG.cc
Expand Up @@ -4556,39 +4556,71 @@ void PG::share_pg_info()
}
}

/*
* Share a new segment of this PG's log with some replicas, after PG is active.
*
* Updates peer_missing and peer_info.
*/
void PG::share_pg_log()
void PG::append_log_entries_update_missing(
const list<pg_log_entry_t> &entries,
ObjectStore::Transaction &t)
{
dout(10) << __func__ << dendl;
assert(!entries.empty());
assert(entries.begin()->version > info.last_update);

PGLogEntryHandler rollbacker;
pg_log.append_new_log_entries(
info.last_backfill,
info.last_backfill_bitwise,
entries,
&rollbacker);
rollbacker.apply(this, &t);
info.last_update = pg_log.get_head();

if (pg_log.get_missing().num_missing() == 0) {
// advance last_complete since nothing else is missing!
info.last_complete = info.last_update;
}

info.stats.stats_invalid = true;
dirty_info = true;
write_if_dirty(t);
}


void PG::merge_new_log_entries(
const list<pg_log_entry_t> &entries,
ObjectStore::Transaction &t)
{
dout(10) << __func__ << " " << entries << dendl;
assert(is_primary());

set<pg_shard_t>::const_iterator a = actingbackfill.begin();
assert(a != actingbackfill.end());
set<pg_shard_t>::const_iterator end = actingbackfill.end();
while (a != end) {
pg_shard_t peer(*a);
++a;
append_log_entries_update_missing(entries, t);
for (set<pg_shard_t>::const_iterator i = actingbackfill.begin();
i != actingbackfill.end();
++i) {
pg_shard_t peer(*i);
if (peer == pg_whoami) continue;
assert(peer_missing.count(peer));
assert(peer_info.count(peer));
pg_missing_t& pmissing(peer_missing[peer]);
pg_info_t& pinfo(peer_info[peer]);

MOSDPGLog *m = new MOSDPGLog(
peer.shard, pg_whoami.shard,
info.last_update.epoch, info);
m->log.copy_after(pg_log.get_log(), pinfo.last_update);

for (list<pg_log_entry_t>::const_iterator i = m->log.log.begin();
i != m->log.log.end();
++i) {
pmissing.add_next_event(*i);
}
pinfo.last_update = m->log.head;

osd->send_message_osd_cluster(peer.osd, m, get_osdmap()->get_epoch());
PGLog::append_log_entries_update_missing(
pinfo.last_backfill,
info.last_backfill_bitwise,
entries,
NULL,
pmissing,
NULL,
this);
pinfo.last_update = info.last_update;
pinfo.stats.stats_invalid = true;
}
for (auto &&i: entries) {
missing_loc.rebuild(
i.soid,
get_sort_bitwise(),
pg_whoami,
actingbackfill,
info,
pg_log.get_missing(),
peer_missing,
peer_info);
}
}

Expand Down Expand Up @@ -6640,7 +6672,6 @@ boost::statechart::result PG::RecoveryState::Active::react(const ActMap&)
if (pg->cct->_conf->osd_auto_mark_unfound_lost) {
pg->osd->clog->error() << pg->info.pgid.pgid << " has " << unfound
<< " objects unfound and apparently lost, would automatically marking lost but NOT IMPLEMENTED\n";
//pg->mark_all_unfound_lost(*context< RecoveryMachine >().get_cur_transaction());
} else
pg->osd->clog->error() << pg->info.pgid.pgid << " has " << unfound << " objects unfound and apparently lost\n";
}
Expand Down
83 changes: 78 additions & 5 deletions src/osd/PG.h
Expand Up @@ -423,6 +423,55 @@ class PG : DoutPrefixProvider {
missing_loc.erase(hoid);
}

/// Call to update structures for hoid after a change
void rebuild(
const hobject_t &hoid,
bool sort_bitwise,
pg_shard_t self,
const set<pg_shard_t> to_recover,
const pg_info_t &info,
const pg_missing_t &missing,
const map<pg_shard_t, pg_missing_t> &pmissing,
const map<pg_shard_t, pg_info_t> &pinfo) {
recovered(hoid);
boost::optional<pg_missing_t::item> item;
set<pg_shard_t> have;
auto miter = missing.missing.find(hoid);
if (miter != missing.missing.end()) {
item = miter->second;
} else {
for (auto &&i: to_recover) {
if (i == self)
continue;
auto pmiter = pmissing.find(i);
assert(pmiter != pmissing.end());
miter = pmiter->second.missing.find(hoid);
if (miter != pmiter->second.missing.end()) {
item = miter->second;
break;
}
}
}
if (!item)
return; // recovered!

needs_recovery_map[hoid] = *item;
auto mliter =
missing_loc.insert(make_pair(hoid, set<pg_shard_t>())).first;
assert(info.last_backfill == hobject_t::get_max());
assert(info.last_update >= item->need);
if (!missing.is_missing(hoid))
mliter->second.insert(self);
for (auto &&i: pmissing) {
auto pinfoiter = pinfo.find(i.first);
assert(pinfoiter != pinfo.end());
if (item->need <= pinfoiter->second.last_update &&
cmp(hoid, pinfoiter->second.last_backfill, sort_bitwise) <= 0 &&
!i.second.is_missing(hoid))
mliter->second.insert(i.first);
}
}

const set<pg_shard_t> &get_locations(const hobject_t &hoid) const {
return missing_loc.count(hoid) ?
missing_loc.find(hoid)->second : empty_set;
Expand Down Expand Up @@ -854,7 +903,6 @@ class PG : DoutPrefixProvider {
bool adjust_need_up_thru(const OSDMapRef osdmap);

bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const;
virtual void mark_all_unfound_lost(int how) = 0;
virtual void dump_recovery_info(Formatter *f) const = 0;

bool calc_min_last_complete_ondisk() {
Expand Down Expand Up @@ -920,11 +968,15 @@ class PG : DoutPrefixProvider {
list<pg_log_entry_t> to_rollback;
set<hobject_t, hobject_t::BitwiseComparator> to_remove;
list<pg_log_entry_t> to_trim;
list<pair<hobject_t, version_t> > to_stash;

// LogEntryHandler
void remove(const hobject_t &hoid) {
to_remove.insert(hoid);
}
void try_stash(const hobject_t &hoid, version_t v) {
to_stash.push_back(make_pair(hoid, v));
}
void rollback(const pg_log_entry_t &entry) {
to_rollback.push_back(entry);
}
Expand All @@ -941,6 +993,11 @@ class PG : DoutPrefixProvider {
SnapRollBacker rollbacker(j->soid, pg, t);
j->mod_desc.visit(&rollbacker);
}
for (list<pair<hobject_t, version_t> >::iterator i = to_stash.begin();
i != to_stash.end();
++i) {
pg->get_pgbackend()->try_stash(i->first, i->second, t);
}
for (set<hobject_t, hobject_t::BitwiseComparator>::iterator i = to_remove.begin();
i != to_remove.end();
++i) {
Expand Down Expand Up @@ -2218,8 +2275,19 @@ class PG : DoutPrefixProvider {

/// share pg info after a pg is active
void share_pg_info();
/// share new pg log entries after a pg is active
void share_pg_log();


void append_log_entries_update_missing(
const list<pg_log_entry_t> &entries,
ObjectStore::Transaction &t);

/**
* Merge entries updating missing as necessary on all
* actingbackfill logs and missings (also missing_loc)
*/
void merge_new_log_entries(
const list<pg_log_entry_t> &entries,
ObjectStore::Transaction &t);

void reset_interval_flush();
void start_peering_interval(
Expand Down Expand Up @@ -2315,8 +2383,13 @@ class PG : DoutPrefixProvider {
virtual void do_backfill(OpRequestRef op) = 0;
virtual void snap_trimmer(epoch_t epoch_queued) = 0;

virtual int do_command(cmdmap_t cmdmap, ostream& ss,
bufferlist& idata, bufferlist& odata) = 0;
virtual int do_command(
cmdmap_t cmdmap,
ostream& ss,
bufferlist& idata,
bufferlist& odata,
ConnectionRef conn,
ceph_tid_t tid) = 0;

virtual void on_role_change() = 0;
virtual void on_pool_change() = 0;
Expand Down
31 changes: 31 additions & 0 deletions src/osd/PGBackend.cc
Expand Up @@ -59,6 +59,12 @@ struct RollbackVisitor : public ObjectModDesc::Visitor {
temp.append(t);
temp.swap(t);
}
void try_rmobject(version_t old_version) {
ObjectStore::Transaction temp;
pg->rollback_try_stash(hoid, old_version, &temp);
temp.append(t);
temp.swap(t);
}
void create() {
ObjectStore::Transaction temp;
pg->rollback_create(hoid, &temp);
Expand All @@ -82,6 +88,17 @@ void PGBackend::rollback(
}


void PGBackend::try_stash(
const hobject_t &hoid,
version_t v,
ObjectStore::Transaction *t)
{
t->try_rename(
coll,
ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
ghobject_t(hoid, v, get_parent()->whoami_shard().shard));
}

void PGBackend::on_change_cleanup(ObjectStore::Transaction *t)
{
dout(10) << __func__ << dendl;
Expand Down Expand Up @@ -253,6 +270,20 @@ void PGBackend::rollback_stash(
ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
}

void PGBackend::rollback_try_stash(
const hobject_t &hoid,
version_t old_version,
ObjectStore::Transaction *t) {
assert(!hoid.is_temp());
t->remove(
coll,
ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
t->try_rename(
coll,
ghobject_t(hoid, old_version, get_parent()->whoami_shard().shard),
ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
}

void PGBackend::rollback_create(
const hobject_t &hoid,
ObjectStore::Transaction *t) {
Expand Down
11 changes: 11 additions & 0 deletions src/osd/PGBackend.h
Expand Up @@ -496,6 +496,11 @@ struct shard_info_wrapper;
) = 0;


void try_stash(
const hobject_t &hoid,
version_t v,
ObjectStore::Transaction *t);

void rollback(
const hobject_t &hoid,
const ObjectModDesc &desc,
Expand All @@ -519,6 +524,12 @@ struct shard_info_wrapper;
version_t old_version,
ObjectStore::Transaction *t);

/// Unstash object to rollback stash
void rollback_try_stash(
const hobject_t &hoid,
version_t old_version,
ObjectStore::Transaction *t);

/// Delete object to rollback create
void rollback_create(
const hobject_t &hoid,
Expand Down
16 changes: 13 additions & 3 deletions src/osd/PGLog.cc
Expand Up @@ -570,6 +570,7 @@ void PGLog::rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead

void PGLog::append_log_entries_update_missing(
const hobject_t &last_backfill,
bool last_backfill_bitwise,
const list<pg_log_entry_t> &entries,
IndexedLog *log,
pg_missing_t &missing,
Expand All @@ -589,12 +590,20 @@ void PGLog::append_log_entries_update_missing(
ldpp_dout(dpp, 20) << "update missing, append " << ne << dendl;
log->index(ne);
}
if (p->soid <= last_backfill) {
if (cmp(p->soid, last_backfill, last_backfill_bitwise) <= 0) {
missing.add_next_event(*p);
if (p->is_delete() && rollbacker)
rollbacker->remove(p->soid);
if (rollbacker) {
// hack to match PG::mark_all_unfound_lost
if (p->is_lost_delete() && p->mod_desc.can_rollback()) {
rollbacker->try_stash(p->soid, p->version.version);
} else if (p->is_delete()) {
rollbacker->remove(p->soid);
}
}
}
}
if (log)
log->reset_rollback_info_trimmed_to_riter();
}

void PGLog::merge_log(ObjectStore::Transaction& t,
Expand Down Expand Up @@ -708,6 +717,7 @@ void PGLog::merge_log(ObjectStore::Transaction& t,
entries.splice(entries.end(), olog.log, from, to);
append_log_entries_update_missing(
info.last_backfill,
info.last_backfill_bitwise,
entries,
&log,
missing,
Expand Down

0 comments on commit e7edf20

Please sign in to comment.