Skip to content

Commit

Permalink
Merge branch 'wip-16980' into wip-mgolub-testing
Browse files Browse the repository at this point in the history
rbd-mirror: potential race condition during failure shutdown #10667
  • Loading branch information
Mykola Golub committed Aug 11, 2016
2 parents f7bfba8 + 74ec7c9 commit 67612b3
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/librbd/parent_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ namespace librbd {
std::string image_id;
snapid_t snap_id;
parent_spec() : pool_id(-1), snap_id(CEPH_NOSNAP) {}
parent_spec(uint64_t pool_id, std::string image_id, snapid_t snap_id) :
parent_spec(int64_t pool_id, std::string image_id, snapid_t snap_id) :
pool_id(pool_id), image_id(image_id), snap_id(snap_id) {}
bool operator==(const parent_spec &other) {
return ((this->pool_id == other.pool_id) &&
Expand Down
125 changes: 117 additions & 8 deletions src/test/rbd_mirror/test_ImageReplayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
#include "journal/Journaler.h"
#include "librbd/AioCompletion.h"
#include "librbd/AioImageRequestWQ.h"
#include "librbd/ExclusiveLock.h"
#include "librbd/ImageCtx.h"
#include "librbd/ImageState.h"
#include "librbd/Journal.h"
#include "librbd/Operations.h"
#include "librbd/Utils.h"
#include "librbd/internal.h"
#include "tools/rbd_mirror/types.h"
Expand Down Expand Up @@ -115,12 +117,7 @@ class TestImageReplayer : public ::testing::Test {

~TestImageReplayer()
{
if (m_watch_handle != 0) {
m_remote_ioctx.unwatch2(m_watch_handle);
delete m_watch_ctx;
m_watch_ctx = nullptr;
m_watch_handle = 0;
}
unwatch();

delete m_replayer;
delete m_threads;
Expand Down Expand Up @@ -150,14 +147,18 @@ class TestImageReplayer : public ::testing::Test {
ASSERT_EQ(0, m_remote_ioctx.watch2(oid, &m_watch_handle, m_watch_ctx));
}

void stop()
{
void unwatch() {
if (m_watch_handle != 0) {
m_remote_ioctx.unwatch2(m_watch_handle);
delete m_watch_ctx;
m_watch_ctx = nullptr;
m_watch_handle = 0;
}
}

void stop()
{
unwatch();

C_SaferCond cond;
m_replayer->stop(&cond);
Expand Down Expand Up @@ -285,6 +286,16 @@ class TestImageReplayer : public ::testing::Test {
ASSERT_EQ(master_position, mirror_position);
}

void wait_for_stopped() {
for (int i = 0; i < 100; i++) {
if (m_replayer->is_stopped()) {
break;
}
wait_for_watcher_notify(1);
}
ASSERT_TRUE(m_replayer->is_stopped());
}

void write_test_data(librbd::ImageCtx *ictx, const char *test_data, off_t off,
size_t len)
{
Expand Down Expand Up @@ -713,3 +724,101 @@ TEST_F(TestImageReplayer, Resync_StartInterrupted)

stop();
}

TEST_F(TestImageReplayer, MultipleReplayFailures_SingleEpoch) {
bootstrap();

// inject a snapshot that cannot be unprotected
librbd::ImageCtx *ictx;
open_image(m_local_ioctx, m_image_name, false, &ictx);
ictx->features &= ~RBD_FEATURE_JOURNALING;
ASSERT_EQ(0, ictx->operations->snap_create("foo"));
ASSERT_EQ(0, ictx->operations->snap_protect("foo"));
ASSERT_EQ(0, librbd::cls_client::add_child(&ictx->md_ctx, RBD_CHILDREN,
{ictx->md_ctx.get_id(),
ictx->id, ictx->snap_ids["foo"]},
"dummy child id"));
close_image(ictx);

// race failed op shut down with new ops
open_remote_image(&ictx);
for (uint64_t i = 0; i < 10; ++i) {
RWLock::RLocker owner_locker(ictx->owner_lock);
C_SaferCond request_lock;
ictx->exclusive_lock->request_lock(&request_lock);
ASSERT_EQ(0, request_lock.wait());

C_SaferCond append_ctx;
ictx->journal->append_op_event(
i,
librbd::journal::EventEntry{
librbd::journal::SnapUnprotectEvent{i, "foo"}},
&append_ctx);
ASSERT_EQ(0, append_ctx.wait());

C_SaferCond commit_ctx;
ictx->journal->commit_op_event(i, 0, &commit_ctx);
ASSERT_EQ(0, commit_ctx.wait());

C_SaferCond release_ctx;
ictx->exclusive_lock->release_lock(&release_ctx);
ASSERT_EQ(0, release_ctx.wait());
}

for (uint64_t i = 0; i < 5; ++i) {
start();
wait_for_stopped();
unwatch();
}
}

TEST_F(TestImageReplayer, MultipleReplayFailures_MultiEpoch) {
bootstrap();

// inject a snapshot that cannot be unprotected
librbd::ImageCtx *ictx;
open_image(m_local_ioctx, m_image_name, false, &ictx);
ictx->features &= ~RBD_FEATURE_JOURNALING;
ASSERT_EQ(0, ictx->operations->snap_create("foo"));
ASSERT_EQ(0, ictx->operations->snap_protect("foo"));
ASSERT_EQ(0, librbd::cls_client::add_child(&ictx->md_ctx, RBD_CHILDREN,
{ictx->md_ctx.get_id(),
ictx->id, ictx->snap_ids["foo"]},
"dummy child id"));
close_image(ictx);

// race failed op shut down with new tag flush
open_remote_image(&ictx);
{
RWLock::RLocker owner_locker(ictx->owner_lock);
C_SaferCond request_lock;
ictx->exclusive_lock->request_lock(&request_lock);
ASSERT_EQ(0, request_lock.wait());

C_SaferCond append_ctx;
ictx->journal->append_op_event(
1U,
librbd::journal::EventEntry{
librbd::journal::SnapUnprotectEvent{1U, "foo"}},
&append_ctx);
ASSERT_EQ(0, append_ctx.wait());

C_SaferCond commit_ctx;
ictx->journal->commit_op_event(1U, 0, &commit_ctx);
ASSERT_EQ(0, commit_ctx.wait());

C_SaferCond release_ctx;
ictx->exclusive_lock->release_lock(&release_ctx);
ASSERT_EQ(0, release_ctx.wait());
}

write_test_data(ictx, m_test_data, 0, TEST_IO_SIZE);

for (uint64_t i = 0; i < 5; ++i) {
start();
wait_for_stopped();
unwatch();
}
close_image(ictx);
}

4 changes: 4 additions & 0 deletions src/tools/rbd_mirror/ImageReplayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,10 @@ void ImageReplayer<I>::replay_flush() {

{
Mutex::Locker locker(m_lock);
if (m_state != STATE_REPLAYING) {
dout(20) << "replay interrupted" << dendl;
return;
}
m_state = STATE_REPLAY_FLUSHING;
}

Expand Down

0 comments on commit 67612b3

Please sign in to comment.