Skip to content

Commit

Permalink
Failure to recover a shard might cause loosing translog data (especia…
Browse files Browse the repository at this point in the history
…lly with no replicas), closes #869.
  • Loading branch information
kimchy committed Apr 20, 2011
1 parent 3c23334 commit f5dbcb2
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 8 deletions.
Expand Up @@ -144,15 +144,24 @@ public class LocalIndexShardGateway extends AbstractIndexShardComponent implemen
try {
InputStreamStreamInput si = new InputStreamStreamInput(new FileInputStream(recoveringTranslogFile));
while (true) {
int opSize = si.readInt();
Translog.Operation operation = TranslogStreams.readTranslogOperation(si);
Translog.Operation operation;
try {
int opSize = si.readInt();
operation = TranslogStreams.readTranslogOperation(si);
} catch (EOFException e) {
// ignore, not properly written the last op
break;
} catch (IOException e) {
// ignore, not properly written last op
break;
}
recoveryStatus.translog().addTranslogOperations(1);
indexShard.performRecoveryOperation(operation);
}
} catch (EOFException e) {
// ignore this exception, its fine
} catch (IOException e) {
// ignore this as well
} catch (Throwable e) {
// we failed to recovery, make sure to delete the translog file (and keep the recovering one)
indexShard.translog().close(true);
throw new IndexShardGatewayRecoveryException(shardId, "failed to recover shard", e);
}
indexShard.performRecoveryFinalization(true);

Expand Down
Expand Up @@ -539,7 +539,7 @@ private void handleRecoveryFailure(IndexService indexService, ShardRouting shard
synchronized (mutex) {
if (indexService.hasShard(shardRouting.shardId().id())) {
try {
indexService.cleanShard(shardRouting.shardId().id(), "recovery failure [" + ExceptionsHelper.detailedMessage(failure) + "]");
indexService.removeShard(shardRouting.shardId().id(), "recovery failure [" + ExceptionsHelper.detailedMessage(failure) + "]");
} catch (IndexShardMissingException e) {
// the node got closed on us, ignore it
} catch (Exception e1) {
Expand Down Expand Up @@ -576,7 +576,7 @@ private class FailedEngineHandler implements Engine.FailedEngineListener {
synchronized (mutex) {
if (indexService.hasShard(shardId.id())) {
try {
indexService.cleanShard(shardId.id(), "engine failure [" + ExceptionsHelper.detailedMessage(failure) + "]");
indexService.removeShard(shardId.id(), "engine failure [" + ExceptionsHelper.detailedMessage(failure) + "]");
} catch (IndexShardMissingException e) {
// the node got closed on us, ignore it
} catch (Exception e1) {
Expand Down

0 comments on commit f5dbcb2

Please sign in to comment.