Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

osd: fixes to make rbd on ec work #12305

Merged
merged 14 commits into from
Dec 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/include/interval_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,11 @@ class interval_set {
swap(a);
union_of(a, b);
}
void union_insert(T off, T len) {
interval_set a;
a.insert(off, len);
union_of(a);
}

bool subset_of(const interval_set &big) const {
for (typename std::map<T,T>::const_iterator i = m.begin();
Expand Down
13 changes: 8 additions & 5 deletions src/osd/ECBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1736,7 +1736,7 @@ bool ECBackend::try_state_to_reads()
Op *op = &(waiting_state.front());
if (op->requires_rmw() && pipeline_state.cache_invalid()) {
dout(20) << __func__ << ": blocking " << *op
<< " because it requires an rmw and the cache is invalid"
<< " because it requires an rmw and the cache is invalid "
<< pipeline_state
<< dendl;
return false;
Expand All @@ -1746,12 +1746,15 @@ bool ECBackend::try_state_to_reads()
dout(20) << __func__ << ": invalidating cache after this op"
<< dendl;
pipeline_state.invalidate();
op->using_cache = false;
} else {
op->using_cache = pipeline_state.caching_enabled();
}

waiting_state.pop_front();
waiting_reads.push_back(*op);

if (op->requires_rmw() || pipeline_state.caching_enabled()) {
if (op->using_cache) {
cache.open_write_pin(op->pin);

extent_set empty;
Expand Down Expand Up @@ -1813,7 +1816,7 @@ bool ECBackend::try_reads_to_commit()
op->hoid,
op->delta_stats);

if (pipeline_state.caching_enabled() || op->requires_rmw()) {
if (op->using_cache) {
for (auto &&hpair: op->pending_read) {
op->remote_read_result[hpair.first].insert(
cache.get_remaining_extents_for_rmw(
Expand Down Expand Up @@ -1872,7 +1875,7 @@ bool ECBackend::try_reads_to_commit()
dout(20) << __func__ << ": written_set: " << written_set << dendl;
assert(written_set == op->plan.will_write);

if (pipeline_state.caching_enabled() || op->requires_rmw()) {
if (op->using_cache) {
for (auto &&hpair: written) {
dout(20) << __func__ << ": " << hpair << dendl;
cache.present_rmw_update(hpair.first, op->pin, hpair.second);
Expand Down Expand Up @@ -1972,7 +1975,7 @@ bool ECBackend::try_finish_rmw()
}
}

if (pipeline_state.caching_enabled() || op->requires_rmw()) {
if (op->using_cache) {
cache.release_write_pin(op->pin);
}
tid_to_op_map.erase(op->tid);
Expand Down
3 changes: 3 additions & 0 deletions src/osd/ECBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,9 @@ class ECBackend : public PGBackend {
bool requires_rmw() const { return !plan.to_read.empty(); }
bool invalidates_cache() const { return plan.invalidates_cache; }

// must be true if requires_rmw(), must be false if invalidates_cache()
bool using_cache = false;

/// In progress read state;
hobject_t::bitwisemap<extent_set> pending_read; // subset already being read
hobject_t::bitwisemap<extent_set> remote_read; // subset we must read
Expand Down
28 changes: 17 additions & 11 deletions src/osd/ECTransaction.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,55 +73,61 @@ namespace ECTransaction {
i.second.truncate->first < projected_size) {
if (!(sinfo.logical_offset_is_stripe_aligned(
i.second.truncate->first))) {
plan.to_read[i.first].insert(
plan.to_read[i.first].union_insert(
sinfo.logical_to_prev_stripe_offset(i.second.truncate->first),
sinfo.get_stripe_width());

ldpp_dout(dpp, 20) << __func__ << ": unaligned truncate" << dendl;

will_write.insert(
will_write.union_insert(
sinfo.logical_to_prev_stripe_offset(i.second.truncate->first),
sinfo.get_stripe_width());
}
projected_size = sinfo.logical_to_next_stripe_offset(
i.second.truncate->first);
}

extent_set raw_write_set;
for (auto &&extent: i.second.buffer_updates) {
using BufferUpdate = PGTransaction::ObjectOperation::BufferUpdate;
if (boost::get<BufferUpdate::CloneRange>(&(extent.get_val()))) {
assert(
0 ==
"CloneRange is not allowed, do_op should have returned ENOTSUPP");
}
raw_write_set.insert(extent.get_off(), extent.get_len());
}

for (auto extent = raw_write_set.begin();
extent != raw_write_set.end();
++extent) {
uint64_t head_start =
sinfo.logical_to_prev_stripe_offset(extent.get_off());
sinfo.logical_to_prev_stripe_offset(extent.get_start());
uint64_t head_finish =
sinfo.logical_to_next_stripe_offset(extent.get_off());
sinfo.logical_to_next_stripe_offset(extent.get_start());
if (head_start > projected_size) {
head_start = projected_size;
}
if (head_start != head_finish &&
head_start < projected_size) {
assert(head_finish <= projected_size);
assert(head_finish - head_start == sinfo.get_stripe_width());
plan.to_read[i.first].insert(
plan.to_read[i.first].union_insert(
head_start, sinfo.get_stripe_width());
}

uint64_t tail_start =
sinfo.logical_to_prev_stripe_offset(
extent.get_off() + extent.get_len());
extent.get_start() + extent.get_len());
uint64_t tail_finish =
sinfo.logical_to_next_stripe_offset(
extent.get_off() + extent.get_len());
extent.get_start() + extent.get_len());
if (tail_start != tail_finish &&
tail_start != head_start &&
(head_start == head_finish || tail_start != head_start) &&
tail_start < projected_size) {
assert(tail_finish <= projected_size);
assert(tail_finish - tail_start == sinfo.get_stripe_width());
plan.to_read[i.first].insert(
plan.to_read[i.first].union_insert(
tail_start, sinfo.get_stripe_width());
}

Expand All @@ -130,7 +136,7 @@ namespace ECTransaction {
sinfo.logical_offset_is_stripe_aligned(
tail_finish - head_start)
);
will_write.insert(
will_write.union_insert(
head_start, tail_finish - head_start);
if (tail_finish > projected_size)
projected_size = tail_finish;
Expand All @@ -146,7 +152,7 @@ namespace ECTransaction {
ldpp_dout(dpp, 20) << __func__ << ": truncating out to "
<< truncating_to
<< dendl;
will_write.insert(projected_size, truncating_to - projected_size);
will_write.union_insert(projected_size, truncating_to - projected_size);
projected_size = truncating_to;
}

Expand Down
5 changes: 4 additions & 1 deletion src/osd/ExtentCache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,20 +168,23 @@ extent_map ExtentCache::get_remaining_extents_for_rmw(
auto &eset = get_or_create(oid);
for (auto &&res: to_get) {
bufferlist bl;
uint64_t cur = res.first;
eset.traverse_update(
pin,
res.first,
res.second,
[&](uint64_t off, uint64_t len,
extent *ext, object_extent_set::update_action *action) {
assert(off == cur);
cur = off + len;
action->action = object_extent_set::update_action::NONE;
assert(ext && ext->bl && ext->pinned_by_write());
bl.substr_of(
*(ext->bl),
off - ext->offset,
len);
ret.insert(off, len, bl);
});
ret.insert(res.first, res.second, bl);
}
return ret;
}
Expand Down
11 changes: 9 additions & 2 deletions src/osd/OSD.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9262,8 +9262,15 @@ int OSD::init_op_flags(OpRequestRef& op)

// set bits based on op codes, called methods.
for (iter = m->ops.begin(); iter != m->ops.end(); ++iter) {
if (ceph_osd_op_mode_modify(iter->op.op))
op->set_write();
if (!(iter->op.op == CEPH_OSD_OP_WATCH &&
iter->op.watch.op == CEPH_OSD_WATCH_OP_PING)) {
/* This a bit odd. PING isn't actually a write. It can't
* result in an update to the object_info. PINGs also aren'ty
* replayed, so there's no reason to write out a log entry
*/
if (ceph_osd_op_mode_modify(iter->op.op))
op->set_write();
}
if (ceph_osd_op_mode_read(iter->op.op))
op->set_read();

Expand Down
16 changes: 12 additions & 4 deletions src/osd/PGTransaction.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ class PGTransaction {
bool is_fresh_object() const {
return boost::get<Init::None>(&init_type) == nullptr;
}
bool is_rename() const {
return boost::get<Init::Rename>(&init_type) != nullptr;
}
bool has_source(hobject_t *source = nullptr) const {
return match(
init_type,
Expand Down Expand Up @@ -295,14 +298,19 @@ class PGTransaction {
op.init_type = ObjectOperation::Init::Rename{source};
}

/// Remove
/// Remove -- must not be called on rename target
void remove(
const hobject_t &hoid ///< [in] obj to remove
) {
auto &op = get_object_op_for_modify(hoid);
assert(!op.updated_snaps);
op = ObjectOperation();
op.delete_first = true;
if (!op.is_fresh_object()) {
assert(!op.updated_snaps);
op = ObjectOperation();
op.delete_first = true;
} else {
assert(!op.is_rename());
op_map.erase(hoid); // make it a noop if it's a fresh object
}
}

void update_snaps(
Expand Down