Skip to content

Commit

Permalink
Merge pull request #12305 from athanatos/wip-ec-rbd-fixes
Browse files Browse the repository at this point in the history
osd: fixes to make rbd on ec work

Reviewed-by: Sage Weil <sage@redhat.com>
  • Loading branch information
liewegas committed Dec 5, 2016
2 parents 645119e + 9bf51f9 commit 2041a18
Show file tree
Hide file tree
Showing 10 changed files with 325 additions and 104 deletions.
5 changes: 5 additions & 0 deletions src/include/interval_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,11 @@ class interval_set {
swap(a);
union_of(a, b);
}
void union_insert(T off, T len) {
interval_set a;
a.insert(off, len);
union_of(a);
}

bool subset_of(const interval_set &big) const {
for (typename std::map<T,T>::const_iterator i = m.begin();
Expand Down
13 changes: 8 additions & 5 deletions src/osd/ECBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1732,7 +1732,7 @@ bool ECBackend::try_state_to_reads()
if (op->requires_rmw() && pipeline_state.cache_invalid()) {
assert(get_parent()->get_pool().is_hacky_ecoverwrites());
dout(20) << __func__ << ": blocking " << *op
<< " because it requires an rmw and the cache is invalid"
<< " because it requires an rmw and the cache is invalid "
<< pipeline_state
<< dendl;
return false;
Expand All @@ -1742,12 +1742,15 @@ bool ECBackend::try_state_to_reads()
dout(20) << __func__ << ": invalidating cache after this op"
<< dendl;
pipeline_state.invalidate();
op->using_cache = false;
} else {
op->using_cache = pipeline_state.caching_enabled();
}

waiting_state.pop_front();
waiting_reads.push_back(*op);

if (op->requires_rmw() || pipeline_state.caching_enabled()) {
if (op->using_cache) {
cache.open_write_pin(op->pin);

extent_set empty;
Expand Down Expand Up @@ -1812,7 +1815,7 @@ bool ECBackend::try_reads_to_commit()
op->hoid,
op->delta_stats);

if (pipeline_state.caching_enabled() || op->requires_rmw()) {
if (op->using_cache) {
for (auto &&hpair: op->pending_read) {
op->remote_read_result[hpair.first].insert(
cache.get_remaining_extents_for_rmw(
Expand Down Expand Up @@ -1871,7 +1874,7 @@ bool ECBackend::try_reads_to_commit()
dout(20) << __func__ << ": written_set: " << written_set << dendl;
assert(written_set == op->plan.will_write);

if (pipeline_state.caching_enabled() || op->requires_rmw()) {
if (op->using_cache) {
for (auto &&hpair: written) {
dout(20) << __func__ << ": " << hpair << dendl;
cache.present_rmw_update(hpair.first, op->pin, hpair.second);
Expand Down Expand Up @@ -1971,7 +1974,7 @@ bool ECBackend::try_finish_rmw()
}
}

if (pipeline_state.caching_enabled() || op->requires_rmw()) {
if (op->using_cache) {
cache.release_write_pin(op->pin);
}
tid_to_op_map.erase(op->tid);
Expand Down
3 changes: 3 additions & 0 deletions src/osd/ECBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,9 @@ class ECBackend : public PGBackend {
bool requires_rmw() const { return !plan.to_read.empty(); }
bool invalidates_cache() const { return plan.invalidates_cache; }

// must be true if requires_rmw(), must be false if invalidates_cache()
bool using_cache = false;

/// In progress read state;
hobject_t::bitwisemap<extent_set> pending_read; // subset already being read
hobject_t::bitwisemap<extent_set> remote_read; // subset we must read
Expand Down
28 changes: 17 additions & 11 deletions src/osd/ECTransaction.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,55 +73,61 @@ namespace ECTransaction {
i.second.truncate->first < projected_size) {
if (!(sinfo.logical_offset_is_stripe_aligned(
i.second.truncate->first))) {
plan.to_read[i.first].insert(
plan.to_read[i.first].union_insert(
sinfo.logical_to_prev_stripe_offset(i.second.truncate->first),
sinfo.get_stripe_width());

ldpp_dout(dpp, 20) << __func__ << ": unaligned truncate" << dendl;

will_write.insert(
will_write.union_insert(
sinfo.logical_to_prev_stripe_offset(i.second.truncate->first),
sinfo.get_stripe_width());
}
projected_size = sinfo.logical_to_next_stripe_offset(
i.second.truncate->first);
}

extent_set raw_write_set;
for (auto &&extent: i.second.buffer_updates) {
using BufferUpdate = PGTransaction::ObjectOperation::BufferUpdate;
if (boost::get<BufferUpdate::CloneRange>(&(extent.get_val()))) {
assert(
0 ==
"CloneRange is not allowed, do_op should have returned ENOTSUPP");
}
raw_write_set.insert(extent.get_off(), extent.get_len());
}

for (auto extent = raw_write_set.begin();
extent != raw_write_set.end();
++extent) {
uint64_t head_start =
sinfo.logical_to_prev_stripe_offset(extent.get_off());
sinfo.logical_to_prev_stripe_offset(extent.get_start());
uint64_t head_finish =
sinfo.logical_to_next_stripe_offset(extent.get_off());
sinfo.logical_to_next_stripe_offset(extent.get_start());
if (head_start > projected_size) {
head_start = projected_size;
}
if (head_start != head_finish &&
head_start < projected_size) {
assert(head_finish <= projected_size);
assert(head_finish - head_start == sinfo.get_stripe_width());
plan.to_read[i.first].insert(
plan.to_read[i.first].union_insert(
head_start, sinfo.get_stripe_width());
}

uint64_t tail_start =
sinfo.logical_to_prev_stripe_offset(
extent.get_off() + extent.get_len());
extent.get_start() + extent.get_len());
uint64_t tail_finish =
sinfo.logical_to_next_stripe_offset(
extent.get_off() + extent.get_len());
extent.get_start() + extent.get_len());
if (tail_start != tail_finish &&
tail_start != head_start &&
(head_start == head_finish || tail_start != head_start) &&
tail_start < projected_size) {
assert(tail_finish <= projected_size);
assert(tail_finish - tail_start == sinfo.get_stripe_width());
plan.to_read[i.first].insert(
plan.to_read[i.first].union_insert(
tail_start, sinfo.get_stripe_width());
}

Expand All @@ -130,7 +136,7 @@ namespace ECTransaction {
sinfo.logical_offset_is_stripe_aligned(
tail_finish - head_start)
);
will_write.insert(
will_write.union_insert(
head_start, tail_finish - head_start);
if (tail_finish > projected_size)
projected_size = tail_finish;
Expand All @@ -146,7 +152,7 @@ namespace ECTransaction {
ldpp_dout(dpp, 20) << __func__ << ": truncating out to "
<< truncating_to
<< dendl;
will_write.insert(projected_size, truncating_to - projected_size);
will_write.union_insert(projected_size, truncating_to - projected_size);
projected_size = truncating_to;
}

Expand Down
5 changes: 4 additions & 1 deletion src/osd/ExtentCache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,20 +168,23 @@ extent_map ExtentCache::get_remaining_extents_for_rmw(
auto &eset = get_or_create(oid);
for (auto &&res: to_get) {
bufferlist bl;
uint64_t cur = res.first;
eset.traverse_update(
pin,
res.first,
res.second,
[&](uint64_t off, uint64_t len,
extent *ext, object_extent_set::update_action *action) {
assert(off == cur);
cur = off + len;
action->action = object_extent_set::update_action::NONE;
assert(ext && ext->bl && ext->pinned_by_write());
bl.substr_of(
*(ext->bl),
off - ext->offset,
len);
ret.insert(off, len, bl);
});
ret.insert(res.first, res.second, bl);
}
return ret;
}
Expand Down
11 changes: 9 additions & 2 deletions src/osd/OSD.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9269,8 +9269,15 @@ int OSD::init_op_flags(OpRequestRef& op)

// set bits based on op codes, called methods.
for (iter = m->ops.begin(); iter != m->ops.end(); ++iter) {
if (ceph_osd_op_mode_modify(iter->op.op))
op->set_write();
if (!(iter->op.op == CEPH_OSD_OP_WATCH &&
iter->op.watch.op == CEPH_OSD_WATCH_OP_PING)) {
/* This a bit odd. PING isn't actually a write. It can't
* result in an update to the object_info. PINGs also aren'ty
* replayed, so there's no reason to write out a log entry
*/
if (ceph_osd_op_mode_modify(iter->op.op))
op->set_write();
}
if (ceph_osd_op_mode_read(iter->op.op))
op->set_read();

Expand Down
16 changes: 12 additions & 4 deletions src/osd/PGTransaction.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ class PGTransaction {
bool is_fresh_object() const {
return boost::get<Init::None>(&init_type) == nullptr;
}
bool is_rename() const {
return boost::get<Init::Rename>(&init_type) != nullptr;
}
bool has_source(hobject_t *source = nullptr) const {
return match(
init_type,
Expand Down Expand Up @@ -298,14 +301,19 @@ class PGTransaction {
op.init_type = ObjectOperation::Init::Rename{source};
}

/// Remove
/// Remove -- must not be called on rename target
void remove(
const hobject_t &hoid ///< [in] obj to remove
) {
auto &op = get_object_op_for_modify(hoid);
assert(!op.updated_snaps);
op = ObjectOperation();
op.delete_first = true;
if (!op.is_fresh_object()) {
assert(!op.updated_snaps);
op = ObjectOperation();
op.delete_first = true;
} else {
assert(!op.is_rename());
op_map.erase(hoid); // make it a noop if it's a fresh object
}
}

void update_snaps(
Expand Down

0 comments on commit 2041a18

Please sign in to comment.