Skip to content

Commit

Permalink
Aborting snapshot might not abort snapshot of shards in very early st…
Browse files Browse the repository at this point in the history
…ages in the snapshot process

If the abort command is issued very early in shard snapshot lifecycle, it might not cancel this shard. This commit backports the change discovered and fixed as part of #11756

Closes #11839
  • Loading branch information
imotov committed Jun 26, 2015
1 parent 215c7dd commit af3944d
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 10 deletions.
Expand Up @@ -493,6 +493,11 @@ public void snapshot(SnapshotIndexCommit snapshotIndexCommit) {

snapshotStatus.files(indexNumberOfFiles, indexTotalFilesSize);

if (snapshotStatus.aborted()) {
logger.debug("[{}] [{}] Aborted during initialization", shardId, snapshotId);
throw new IndexShardSnapshotFailedException(shardId, "Aborted");
}

snapshotStatus.updateStage(IndexShardSnapshotStatus.Stage.STARTED);

for (FileInfo snapshotFileInfo : filesToSnapshot) {
Expand Down
30 changes: 20 additions & 10 deletions src/main/java/org/elasticsearch/snapshots/SnapshotsService.java
Expand Up @@ -816,16 +816,26 @@ private void processIndexShardSnapshots(ClusterChangedEvent event) {
for (Map.Entry<ShardId, SnapshotMetaData.ShardSnapshotStatus> shard : entry.shards().entrySet()) {
IndexShardSnapshotStatus snapshotStatus = snapshotShards.shards.get(shard.getKey());
if (snapshotStatus != null) {
if (snapshotStatus.stage() == IndexShardSnapshotStatus.Stage.STARTED) {
snapshotStatus.abort();
} else if (snapshotStatus.stage() == IndexShardSnapshotStatus.Stage.DONE) {
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, updating status on the master", entry.snapshotId(), shard.getKey());
updateIndexShardSnapshotStatus(new UpdateIndexShardSnapshotStatusRequest(entry.snapshotId(), shard.getKey(),
new ShardSnapshotStatus(event.state().nodes().localNodeId(), SnapshotMetaData.State.SUCCESS)));
} else if (snapshotStatus.stage() == IndexShardSnapshotStatus.Stage.FAILURE) {
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, updating status on the master", entry.snapshotId(), shard.getKey());
updateIndexShardSnapshotStatus(new UpdateIndexShardSnapshotStatusRequest(entry.snapshotId(), shard.getKey(),
new ShardSnapshotStatus(event.state().nodes().localNodeId(), State.FAILED, snapshotStatus.failure())));
switch (snapshotStatus.stage()) {
case INIT:
case STARTED:
snapshotStatus.abort();
break;
case FINALIZE:
logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, letting it finish", entry.snapshotId(), shard.getKey());
break;
case DONE:
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, updating status on the master", entry.snapshotId(), shard.getKey());
updateIndexShardSnapshotStatus(new UpdateIndexShardSnapshotStatusRequest(entry.snapshotId(), shard.getKey(),
new ShardSnapshotStatus(event.state().nodes().localNodeId(), SnapshotMetaData.State.SUCCESS)));
break;
case FAILURE:
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, updating status on the master", entry.snapshotId(), shard.getKey());
updateIndexShardSnapshotStatus(new UpdateIndexShardSnapshotStatusRequest(entry.snapshotId(), shard.getKey(),
new ShardSnapshotStatus(event.state().nodes().localNodeId(), State.FAILED, snapshotStatus.failure())));
break;
default:
throw new IllegalStateException("Unknown snapshot shard stage " + snapshotStatus.stage());
}
}
}
Expand Down

0 comments on commit af3944d

Please sign in to comment.