Skip to content

Commit

Permalink
Merge pull request #10047 from dachary/wip-16426-jewel
Browse files Browse the repository at this point in the history
jewel: Possible race condition during journal transition from replay to ready

Reviewed-by: Mykola Golub <mgolub@mirantis.com>
  • Loading branch information
Loic Dachary committed Aug 9, 2016
2 parents 4fe02a6 + 70bf746 commit 64d5ff9
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 9 deletions.
22 changes: 13 additions & 9 deletions src/librbd/journal/Replay.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ Replay<I>::~Replay() {
assert(m_aio_modify_unsafe_contexts.empty());
assert(m_aio_modify_safe_contexts.empty());
assert(m_op_events.empty());
assert(m_in_flight_op_events == 0);
}

template <typename I>
Expand Down Expand Up @@ -208,7 +209,7 @@ void Replay<I>::shut_down(bool cancel_ops, Context *on_finish) {
}

assert(m_flush_ctx == nullptr);
if (!m_op_events.empty() || flush_comp != nullptr) {
if (m_in_flight_op_events > 0 || flush_comp != nullptr) {
std::swap(m_flush_ctx, on_finish);
}
}
Expand Down Expand Up @@ -664,7 +665,7 @@ void Replay<I>::handle_aio_flush_complete(Context *on_flush_safe,
m_in_flight_aio_modify -= on_safe_ctxs.size();

std::swap(on_aio_ready, m_on_aio_ready);
if (m_op_events.empty() &&
if (m_in_flight_op_events == 0 &&
(m_in_flight_aio_flush + m_in_flight_aio_modify) == 0) {
on_flush = m_flush_ctx;
}
Expand Down Expand Up @@ -716,6 +717,7 @@ Context *Replay<I>::create_op_context_callback(uint64_t op_tid,
return nullptr;
}

++m_in_flight_op_events;
*op_event = &m_op_events[op_tid];
(*op_event)->on_start_safe = on_safe;

Expand All @@ -731,7 +733,6 @@ void Replay<I>::handle_op_complete(uint64_t op_tid, int r) {
<< "r=" << r << dendl;

OpEvent op_event;
Context *on_flush = nullptr;
bool shutting_down = false;
{
Mutex::Locker locker(m_lock);
Expand All @@ -742,10 +743,6 @@ void Replay<I>::handle_op_complete(uint64_t op_tid, int r) {
m_op_events.erase(op_it);

shutting_down = (m_flush_ctx != nullptr);
if (m_op_events.empty() &&
(m_in_flight_aio_flush + m_in_flight_aio_modify) == 0) {
on_flush = m_flush_ctx;
}
}

assert(op_event.on_start_ready == nullptr || (r < 0 && r != -ERESTART));
Expand Down Expand Up @@ -778,8 +775,15 @@ void Replay<I>::handle_op_complete(uint64_t op_tid, int r) {
if (op_event.on_finish_safe != nullptr) {
op_event.on_finish_safe->complete(r);
}
if (on_flush != nullptr) {
on_flush->complete(0);

// shut down request might have occurred while lock was
// dropped -- handle if pending
Mutex::Locker locker(m_lock);
assert(m_in_flight_op_events > 0);
--m_in_flight_op_events;
if (m_flush_ctx != nullptr && m_in_flight_op_events == 0 &&
(m_in_flight_aio_flush + m_in_flight_aio_modify) == 0) {
m_image_ctx.op_work_queue->queue(m_flush_ctx, 0);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/librbd/journal/Replay.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ class Replay {
ContextSet m_aio_modify_safe_contexts;

OpEvents m_op_events;
uint64_t m_in_flight_op_events = 0;

Context *m_flush_ctx = nullptr;
Context *m_on_aio_ready = nullptr;
Expand Down

0 comments on commit 64d5ff9

Please sign in to comment.