Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync up snapshot shard status on a master restart #11450

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
65 changes: 60 additions & 5 deletions src/main/java/org/elasticsearch/snapshots/SnapshotsService.java
Expand Up @@ -549,13 +549,18 @@ public void clusterChanged(ClusterChangedEvent event) {

if (prev == null) {
if (curr != null) {
processIndexShardSnapshots(curr);
processIndexShardSnapshots(event);
}
} else {
if (!prev.equals(curr)) {
processIndexShardSnapshots(curr);
processIndexShardSnapshots(event);
}
}
if (event.state().nodes().masterNodeId() != null &&
event.state().nodes().masterNodeId().equals(event.previousState().nodes().masterNodeId()) == false) {
syncShardStatsOnNewMaster(event);
}

} catch (Throwable t) {
logger.warn("Failed to update snapshot state ", t);
}
Expand Down Expand Up @@ -778,9 +783,10 @@ private boolean removedNodesCleanupNeeded(ClusterChangedEvent event) {
/**
* Checks if any new shards should be snapshotted on this node
*
* @param snapshotMetaData snapshot metadata to be processed
* @param event cluster state changed event
*/
private void processIndexShardSnapshots(SnapshotMetaData snapshotMetaData) {
private void processIndexShardSnapshots(ClusterChangedEvent event) {
SnapshotMetaData snapshotMetaData = event.state().metaData().custom(SnapshotMetaData.TYPE);
Map<SnapshotId, SnapshotShards> survivors = newHashMap();
// First, remove snapshots that are no longer there
for (Map.Entry<SnapshotId, SnapshotShards> entry : shardSnapshots.entrySet()) {
Expand Down Expand Up @@ -830,7 +836,17 @@ private void processIndexShardSnapshots(SnapshotMetaData snapshotMetaData) {
for (Map.Entry<ShardId, SnapshotMetaData.ShardSnapshotStatus> shard : entry.shards().entrySet()) {
IndexShardSnapshotStatus snapshotStatus = snapshotShards.shards.get(shard.getKey());
if (snapshotStatus != null) {
snapshotStatus.abort();
if (snapshotStatus.stage() == IndexShardSnapshotStatus.Stage.STARTED) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we use switch case statements for this it seems to be easier to read?

snapshotStatus.abort();
} else if (snapshotStatus.stage() == IndexShardSnapshotStatus.Stage.DONE) {
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, updating status on the master", entry.snapshotId(), shard.getKey());
updateIndexShardSnapshotStatus(new UpdateIndexShardSnapshotStatusRequest(entry.snapshotId(), shard.getKey(),
new ShardSnapshotStatus(event.state().nodes().localNodeId(), SnapshotMetaData.State.SUCCESS)));
} else if (snapshotStatus.stage() == IndexShardSnapshotStatus.Stage.FAILURE) {
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, updating status on the master", entry.snapshotId(), shard.getKey());
updateIndexShardSnapshotStatus(new UpdateIndexShardSnapshotStatusRequest(entry.snapshotId(), shard.getKey(),
new ShardSnapshotStatus(event.state().nodes().localNodeId(), State.FAILED, snapshotStatus.failure())));
}
}
}
}
Expand Down Expand Up @@ -878,6 +894,45 @@ public void run() {
}
}

/**
* Checks if any shards were processed that the new master doesn't know about
* @param event
*/
private void syncShardStatsOnNewMaster(ClusterChangedEvent event) {
SnapshotMetaData snapshotMetaData = event.state().getMetaData().custom(SnapshotMetaData.TYPE);
if (snapshotMetaData == null) {
return;
}
for (SnapshotMetaData.Entry snapshot : snapshotMetaData.entries()) {
if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) {
ImmutableMap<ShardId, IndexShardSnapshotStatus> localShards = currentSnapshotShards(snapshot.snapshotId());
if (localShards != null) {
ImmutableMap<ShardId, ShardSnapshotStatus> masterShards = snapshot.shards();
for(Map.Entry<ShardId, IndexShardSnapshotStatus> localShard : localShards.entrySet()) {
ShardId shardId = localShard.getKey();
IndexShardSnapshotStatus localShardStatus = localShard.getValue();
ShardSnapshotStatus masterShard = masterShards.get(shardId);
if (masterShard != null && masterShard.state().completed() == false) {
// Master knows about the shard and thinks it has not completed
if (localShardStatus.stage() == IndexShardSnapshotStatus.Stage.DONE) {
// but we think the shard is done - we need to make new master know that the shard is done
logger.debug("[{}] new master thinks the shard [{}] is not completed but the shard is done locally, updating status on the master", snapshot.snapshotId(), shardId);
updateIndexShardSnapshotStatus(new UpdateIndexShardSnapshotStatusRequest(snapshot.snapshotId(), shardId,
new ShardSnapshotStatus(event.state().nodes().localNodeId(), SnapshotMetaData.State.SUCCESS)));
} else if (localShard.getValue().stage() == IndexShardSnapshotStatus.Stage.FAILURE) {
// but we think the shard failed - we need to make new master know that the shard failed
logger.debug("[{}] new master thinks the shard [{}] is not completed but the shard failed locally, updating status on master", snapshot.snapshotId(), shardId);
updateIndexShardSnapshotStatus(new UpdateIndexShardSnapshotStatusRequest(snapshot.snapshotId(), shardId,
new ShardSnapshotStatus(event.state().nodes().localNodeId(), State.FAILED, localShardStatus.failure())));

}
}
}
}
}
}
}

/**
* Updates the shard status
*
Expand Down
131 changes: 131 additions & 0 deletions src/test/java/org/elasticsearch/snapshots/AbstractSnapshotTests.java
Expand Up @@ -22,8 +22,12 @@
import com.google.common.collect.ImmutableList;

import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
import org.elasticsearch.action.admin.cluster.tasks.PendingClusterTasksResponse;
import org.elasticsearch.cluster.*;
import org.elasticsearch.cluster.metadata.SnapshotId;
import org.elasticsearch.cluster.metadata.SnapshotMetaData;
import org.elasticsearch.cluster.service.PendingClusterTask;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.repositories.RepositoriesService;
Expand All @@ -37,9 +41,12 @@
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;

/**
*/
Expand Down Expand Up @@ -121,4 +128,128 @@ public static String blockNodeWithIndex(String index) {
public static void unblockNode(String node) {
((MockRepository)internalCluster().getInstance(RepositoriesService.class, node).repository("test-repo")).unblock();
}

protected void assertBusyPendingTasks(final String taskPrefix, final int expectedCount) throws Exception {
assertBusy(new Runnable() {
@Override
public void run() {
PendingClusterTasksResponse tasks = client().admin().cluster().preparePendingClusterTasks().get();
int count = 0;
for(PendingClusterTask task : tasks) {
if (task.getSource().toString().startsWith(taskPrefix)) {
count++;
}
}
assertThat(count, greaterThanOrEqualTo(expectedCount));
}
}, 1, TimeUnit.MINUTES);
}

/**
* Cluster state task that blocks waits for the blockOn task to show up and then blocks execution not letting
* any cluster state update task to be performed unless they have priority higher then passThroughPriority.
*
* This class is useful to testing of cluster state update task batching for lower priority tasks.
*/
protected class BlockingClusterStateListener implements ClusterStateListener {

private final Predicate<ClusterChangedEvent> blockOn;
private final Predicate<ClusterChangedEvent> countOn;
private final ClusterService clusterService;
private final CountDownLatch latch;
private final Priority passThroughPriority;
private int count;
private boolean timedOut;
private final TimeValue timeout;
private long stopWaitingAt = -1;

public BlockingClusterStateListener(ClusterService clusterService, String blockOn, String countOn, Priority passThroughPriority) {
this(clusterService, blockOn, countOn, passThroughPriority, TimeValue.timeValueMinutes(1));
}

public BlockingClusterStateListener(ClusterService clusterService, final String blockOn, final String countOn, Priority passThroughPriority, TimeValue timeout) {
this.clusterService = clusterService;
this.blockOn = new Predicate<ClusterChangedEvent>() {
@Override
public boolean apply(ClusterChangedEvent clusterChangedEvent) {
return clusterChangedEvent.source().startsWith(blockOn);
}
};
this.countOn = new Predicate<ClusterChangedEvent>() {
@Override
public boolean apply(ClusterChangedEvent clusterChangedEvent) {
return clusterChangedEvent.source().startsWith(countOn);
}
};
this.latch = new CountDownLatch(1);
this.passThroughPriority = passThroughPriority;
this.timeout = timeout;

}

public void unblock() {
latch.countDown();
}

@Override
public void clusterChanged(ClusterChangedEvent event) {
if (blockOn.apply(event)) {
logger.info("blocking cluster state tasks on [{}]", event.source());
assert stopWaitingAt < 0; // Make sure we are the first time here
stopWaitingAt = System.currentTimeMillis() + timeout.getMillis();
addBlock();
}
if (countOn.apply(event)) {
count++;
}
}

private void addBlock() {
// We should block after this task - add blocking cluster state update task
clusterService.submitStateUpdateTask("test_block", passThroughPriority, new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
while(System.currentTimeMillis() < stopWaitingAt) {
for (PendingClusterTask task : clusterService.pendingTasks()) {
if (task.getSource().string().equals("test_block") == false && passThroughPriority.sameOrAfter(task.getPriority())) {
// There are other higher priority tasks in the queue and let them pass through and then set the block again
logger.info("passing through cluster state task {}", task.getSource());
addBlock();
return currentState;
}
}
try {
logger.info("waiting....");
if (latch.await(Math.min(100, timeout.millis()), TimeUnit.MILLISECONDS)){
// Done waiting - unblock
logger.info("unblocked");
return currentState;
}
logger.info("done waiting....");
} catch (InterruptedException ex) {
logger.info("interrupted....");
Thread.currentThread().interrupt();
return currentState;
}
}
timedOut = true;
return currentState;
}

@Override
public void onFailure(String source, Throwable t) {
logger.warn("failed to execute [{}]", t, source);
}
});

}

public int count() {
return count;
}

public boolean timedOut() {
return timedOut;
}
}
}
Expand Up @@ -34,13 +34,15 @@
import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus;
import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse;
import org.elasticsearch.action.admin.indices.recovery.ShardRecoveryResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ProcessedClusterStateUpdateTask;
import org.elasticsearch.cluster.AbstractDiffable;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.metadata.MetaData.Custom;
import org.elasticsearch.cluster.metadata.SnapshotMetaData;
import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider;
import org.elasticsearch.cluster.metadata.MetaDataIndexStateService;
import org.elasticsearch.common.Nullable;
Expand All @@ -64,6 +66,7 @@
import org.elasticsearch.snapshots.mockstore.MockRepositoryModule;
import org.elasticsearch.snapshots.mockstore.MockRepositoryPlugin;
import org.elasticsearch.test.InternalTestCluster;
import org.elasticsearch.test.junit.annotations.TestLogging;
import org.elasticsearch.test.rest.FakeRestRequest;
import org.junit.Ignore;
import org.junit.Test;
Expand Down Expand Up @@ -791,6 +794,83 @@ public void run() {
logger.info("--> done");
}

@Test
public void masterShutdownDuringSnapshotTest() throws Exception {

Settings masterSettings = settingsBuilder().put("node.data", false).build();
Settings dataSettings = settingsBuilder().put("node.master", false).build();

logger.info("--> starting two master nodes and two data nodes");
internalCluster().startNode(masterSettings);
internalCluster().startNode(masterSettings);
internalCluster().startNode(dataSettings);
internalCluster().startNode(dataSettings);

final Client client = client();

logger.info("--> creating repository");
assertAcked(client.admin().cluster().preparePutRepository("test-repo")
.setType("fs").setSettings(Settings.settingsBuilder()
.put("location", randomRepoPath())
.put("compress", randomBoolean())
.put("chunk_size", randomIntBetween(100, 1000))));

assertAcked(prepareCreate("test-idx", 0, settingsBuilder().put("number_of_shards", between(1, 20))
.put("number_of_replicas", 0)));
ensureGreen();

logger.info("--> indexing some data");
final int numdocs = randomIntBetween(10, 100);
IndexRequestBuilder[] builders = new IndexRequestBuilder[numdocs];
for (int i = 0; i < builders.length; i++) {
builders[i] = client().prepareIndex("test-idx", "type1", Integer.toString(i)).setSource("field1", "bar " + i);
}
indexRandom(true, builders);
flushAndRefresh();

final int numberOfShards = getNumShards("test-idx").numPrimaries;
logger.info("number of shards: {}", numberOfShards);

final ClusterService clusterService = internalCluster().clusterService(internalCluster().getMasterName());
BlockingClusterStateListener snapshotListener = new BlockingClusterStateListener(clusterService, "update_snapshot [", "update snapshot state", Priority.HIGH);
try {
clusterService.addFirst(snapshotListener);
logger.info("--> snapshot");
dataNodeClient().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();

// Await until some updates are in pending state.
assertBusyPendingTasks("update snapshot state", 1);

logger.info("--> stopping master node");
internalCluster().stopCurrentMasterNode();

logger.info("--> unblocking snapshot execution");
snapshotListener.unblock();

logger.info("--> wait until the snapshot is done");

} finally {
clusterService.remove(snapshotListener);
}

assertBusy(new Runnable() {
@Override
public void run() {
SnapshotsStatusResponse snapshotsStatusResponse = client().admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-snap").get();
ImmutableList<SnapshotStatus> snapshotStatuses = snapshotsStatusResponse.getSnapshots();
assertEquals(1, snapshotStatuses.size());
assertTrue(snapshotStatuses.get(0).getState().completed());
}
});

GetSnapshotsResponse snapshotsStatusResponse = client().admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").get();
SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0);
assertEquals(SnapshotState.SUCCESS, snapshotInfo.state());
assertEquals(snapshotInfo.totalShards(), snapshotInfo.successfulShards());
assertEquals(0, snapshotInfo.failedShards());
}


private boolean snapshotIsDone(String repository, String snapshot) {
try {
SnapshotsStatusResponse snapshotsStatusResponse = client().admin().cluster().prepareSnapshotStatus(repository).setSnapshots(snapshot).get();
Expand Down