Skip to content

Commit

Permalink
Core: remove index.fail_on_corruption setting
Browse files Browse the repository at this point in the history
Now we always fail the engine if corruption is detected.

Closes #10092
  • Loading branch information
mikemccand committed Mar 14, 2015
1 parent caafad7 commit 7861385
Show file tree
Hide file tree
Showing 9 changed files with 5 additions and 65 deletions.
9 changes: 2 additions & 7 deletions src/main/java/org/elasticsearch/index/engine/Engine.java
Expand Up @@ -489,13 +489,8 @@ public void failEngine(String reason, Throwable failure) {
/** Check whether the engine should be failed */
protected boolean maybeFailEngine(String source, Throwable t) {
if (Lucene.isCorruptionException(t)) {
if (engineConfig.isFailEngineOnCorruption()) {
failEngine("corrupt file detected source: [" + source + "]", t);
return true;
} else {
logger.warn("corrupt file detected source: [{}] but [{}] is set to [{}]", t, source,
EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, engineConfig.isFailEngineOnCorruption());
}
failEngine("corrupt file detected source: [" + source + "]", t);
return true;
} else if (ExceptionsHelper.isOOM(t)) {
failEngine("out of memory", t);
return true;
Expand Down
22 changes: 0 additions & 22 deletions src/main/java/org/elasticsearch/index/engine/EngineConfig.java
Expand Up @@ -50,7 +50,6 @@
*/
public final class EngineConfig {
private final ShardId shardId;
private volatile boolean failEngineOnCorruption = true;
private volatile ByteSizeValue indexingBufferSize;
private final int indexConcurrency;
private volatile boolean compoundOnFlush = true;
Expand Down Expand Up @@ -98,12 +97,6 @@ public final class EngineConfig {
*/
public static final String INDEX_GC_DELETES_SETTING = "index.gc_deletes";

/**
* Index setting to enable / disable engine failures on detected index corruptions. Default is <code>true</code> / <tt>enabled</tt>.
* This setting is realtime updateable.
*/
public static final String INDEX_FAIL_ON_CORRUPTION_SETTING = "index.fail_on_corruption";

/**
* Index setting to control the initial index buffer size.
* This setting is <b>not</b> realtime updateable.
Expand Down Expand Up @@ -148,7 +141,6 @@ public EngineConfig(ShardId shardId, boolean optimizeAutoGenerateId, ThreadPool
this.indexConcurrency = indexSettings.getAsInt(EngineConfig.INDEX_CONCURRENCY_SETTING, Math.max(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, (int) (EsExecutors.boundedNumberOfProcessors(indexSettings) * 0.65)));
codecName = indexSettings.get(EngineConfig.INDEX_CODEC_SETTING, EngineConfig.DEFAULT_CODEC_NAME);
indexingBufferSize = indexSettings.getAsBytesSize(INDEX_BUFFER_SIZE_SETTING, DEFAUTL_INDEX_BUFFER_SIZE);
failEngineOnCorruption = indexSettings.getAsBoolean(INDEX_FAIL_ON_CORRUPTION_SETTING, true);
gcDeletesInMillis = indexSettings.getAsTime(INDEX_GC_DELETES_SETTING, EngineConfig.DEFAULT_GC_DELETES).millis();
}

Expand All @@ -168,13 +160,6 @@ public void setEnableGcDeletes(boolean enableGcDeletes) {
this.enableGcDeletes = enableGcDeletes;
}

/**
* Returns <code>true</code> if the engine should be failed in the case of a corrupted index. Defaults to <code>true</code>
*/
public boolean isFailEngineOnCorruption() {
return failEngineOnCorruption;
}

/**
* Returns the initial index buffer size. This setting is only read on startup and otherwise controlled by {@link org.elasticsearch.indices.memory.IndexingMemoryController}
*/
Expand Down Expand Up @@ -355,11 +340,4 @@ public void setGcDeletesInMillis(long gcDeletesInMillis) {
public void setCompoundOnFlush(boolean compoundOnFlush) {
this.compoundOnFlush = compoundOnFlush;
}

/**
* Sets if the engine should be failed in the case of a corrupted index. Defaults to <code>true</code>
*/
public void setFailEngineOnCorruption(boolean failEngineOnCorruption) {
this.failEngineOnCorruption = failEngineOnCorruption;
}
}
Expand Up @@ -1093,11 +1093,7 @@ class FailEngineOnMergeFailure implements MergeSchedulerProvider.FailureListener
@Override
public void onFailedMerge(MergePolicy.MergeException e) {
if (Lucene.isCorruptionException(e)) {
if (engineConfig.isFailEngineOnCorruption()) {
failEngine("corrupt file detected source: [merge]", e);
} else {
logger.warn("corrupt file detected source: [merge] but [{}] is set to [{}]", e, EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, engineConfig.isFailEngineOnCorruption());
}
failEngine("corrupt file detected source: [merge]", e);
} else {
failEngine("merge exception", e);
}
Expand Down
Expand Up @@ -85,7 +85,6 @@ public IndexDynamicSettingsModule() {
indexDynamicSettings.addDynamicSetting(LogDocMergePolicyProvider.INDEX_COMPOUND_FORMAT);
indexDynamicSettings.addDynamicSetting(EngineConfig.INDEX_COMPOUND_ON_FLUSH, Validator.BOOLEAN);
indexDynamicSettings.addDynamicSetting(EngineConfig.INDEX_GC_DELETES_SETTING, Validator.TIME);
indexDynamicSettings.addDynamicSetting(EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, Validator.BOOLEAN);
indexDynamicSettings.addDynamicSetting(IndexShard.INDEX_FLUSH_ON_CLOSE, Validator.BOOLEAN);
indexDynamicSettings.addDynamicSetting(ShardSlowLogIndexingService.INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_WARN, Validator.TIME);
indexDynamicSettings.addDynamicSetting(ShardSlowLogIndexingService.INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_INFO, Validator.TIME);
Expand Down
7 changes: 0 additions & 7 deletions src/main/java/org/elasticsearch/index/shard/IndexShard.java
Expand Up @@ -1029,13 +1029,6 @@ public void onRefreshSettings(Settings settings) {
config.setCompoundOnFlush(compoundOnFlush);
change = true;
}

final boolean failEngineOnCorruption = settings.getAsBoolean(EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, config.isFailEngineOnCorruption());
if (failEngineOnCorruption != config.isFailEngineOnCorruption()) {
logger.info("updating {} from [{}] to [{}]", EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, config.isFailEngineOnCorruption(), failEngineOnCorruption);
config.setFailEngineOnCorruption(failEngineOnCorruption);
change = true;
}
}
if (change) {
refresh("apply settings");
Expand Down
Expand Up @@ -44,11 +44,9 @@ public void testSettingsUpdate() {
final int iters = between(1, 20);
for (int i = 0; i < iters; i++) {
boolean compoundOnFlush = randomBoolean();
boolean failOnCorruption = randomBoolean();
long gcDeletes = Math.max(0, randomLong());

Settings build = ImmutableSettings.builder()
.put(EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, failOnCorruption)
.put(EngineConfig.INDEX_COMPOUND_ON_FLUSH, compoundOnFlush)
.put(EngineConfig.INDEX_GC_DELETES_SETTING, gcDeletes)
.build();
Expand All @@ -61,7 +59,6 @@ public void testSettingsUpdate() {

assertEquals(engine.config().getGcDeletesInMillis(), gcDeletes);
assertEquals(engine.getGcDeletesInMillis(), gcDeletes);
assertEquals(engine.config().isFailEngineOnCorruption(), failOnCorruption);
}

Settings settings = ImmutableSettings.builder()
Expand Down
Expand Up @@ -147,7 +147,6 @@ public void run() {
defaultSettings = ImmutableSettings.builder()
.put(EngineConfig.INDEX_COMPOUND_ON_FLUSH, randomBoolean())
.put(EngineConfig.INDEX_GC_DELETES_SETTING, "1h") // make sure this doesn't kick in on us
.put(EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, randomBoolean())
.put(EngineConfig.INDEX_CODEC_SETTING, codecName)
.put(EngineConfig.INDEX_CONCURRENCY_SETTING, indexConcurrency)
.build(); // TODO randomize more settings
Expand Down Expand Up @@ -681,7 +680,6 @@ public void testFailEngineOnCorruption() {
ParsedDocument doc = testParsedDocument("1", "1", "test", null, -1, -1, testDocumentWithTextField(), B_1, false);
engine.create(new Engine.Create(null, newUid("1"), doc));
engine.flush();
final boolean failEngine = defaultSettings.getAsBoolean(EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, false);
final int failInPhase = randomIntBetween(1, 3);
try {
engine.recover(new Engine.RecoveryHandler() {
Expand Down Expand Up @@ -724,9 +722,9 @@ public void phase3(Translog.Snapshot snapshot) throws EngineException {
MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 2));
MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(2));
searchResult.close();
assertThat(failEngine, is(false));
fail("engine should have failed");
} catch (EngineClosedException ex) {
assertThat(failEngine, is(true));
// expected
}
}

Expand Down Expand Up @@ -1466,15 +1464,13 @@ public void testFailStart() throws IOException {
assertEquals(store.refCount(), refCount);
continue;
}
holder.config().setFailEngineOnCorruption(true);
assertEquals(store.refCount(), refCount + 1);
final int numStarts = scaledRandomIntBetween(1, 5);
for (int j = 0; j < numStarts; j++) {
try {
assertEquals(store.refCount(), refCount + 1);
holder.close();
holder = createEngine(store, translog);
holder.config().setFailEngineOnCorruption(true);
assertEquals(store.refCount(), refCount + 1);
} catch (EngineCreationFailureException ex) {
// all is fine
Expand Down Expand Up @@ -1502,7 +1498,6 @@ public void testSettings() {


IndexDynamicSettingsModule settings = new IndexDynamicSettingsModule();
assertTrue(settings.containsSetting(EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING));
assertTrue(settings.containsSetting(EngineConfig.INDEX_COMPOUND_ON_FLUSH));
assertTrue(settings.containsSetting(EngineConfig.INDEX_GC_DELETES_SETTING));
}
Expand Down
Expand Up @@ -121,7 +121,6 @@ public void setUp() throws Exception {
defaultSettings = ImmutableSettings.builder()
.put(EngineConfig.INDEX_COMPOUND_ON_FLUSH, randomBoolean())
.put(EngineConfig.INDEX_GC_DELETES_SETTING, "1h") // make sure this doesn't kick in on us
.put(EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, randomBoolean())
.put(EngineConfig.INDEX_CODEC_SETTING, codecName)
.put(EngineConfig.INDEX_CONCURRENCY_SETTING, indexConcurrency)
.build(); // TODO randomize more settings
Expand Down Expand Up @@ -893,15 +892,13 @@ public void testFailStart() throws IOException {
assertEquals(store.refCount(), refCount);
continue;
}
holder.config().setFailEngineOnCorruption(true);
assertEquals(store.refCount(), refCount+1);
final int numStarts = scaledRandomIntBetween(1, 5);
for (int j = 0; j < numStarts; j++) {
try {
assertEquals(store.refCount(), refCount + 1);
holder.close();
holder = createShadowEngine(store, translog);
holder.config().setFailEngineOnCorruption(true);
assertEquals(store.refCount(), refCount + 1);
} catch (EngineCreationFailureException ex) {
// all is fine
Expand Down
10 changes: 0 additions & 10 deletions src/test/java/org/elasticsearch/index/store/CorruptedFileTest.java
Expand Up @@ -123,19 +123,13 @@ public void testCorruptFileAndRecover() throws ExecutionException, InterruptedEx
}
assertThat(cluster().numDataNodes(), greaterThanOrEqualTo(3));

final boolean failOnCorruption = randomBoolean();
assertAcked(prepareCreate("test").setSettings(ImmutableSettings.builder()
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1")
.put(MergePolicyModule.MERGE_POLICY_TYPE_KEY, NoMergePolicyProvider.class)
.put(MockFSDirectoryService.CHECK_INDEX_ON_CLOSE, false) // no checkindex - we corrupt shards on purpose
.put(EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, failOnCorruption)
.put(TranslogService.INDEX_TRANSLOG_DISABLE_FLUSH, true) // no translog based flush - it might change the .liv / segments.N files
.put("indices.recovery.concurrent_streams", 10)
));
if (failOnCorruption == false) { // test the dynamic setting
client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder()
.put(EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, true)).get();
}
ensureGreen();
disableAllocation("test");
IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
Expand Down Expand Up @@ -238,7 +232,6 @@ public void testCorruptPrimaryNoReplica() throws ExecutionException, Interrupted
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "0")
.put(MergePolicyModule.MERGE_POLICY_TYPE_KEY, NoMergePolicyProvider.class)
.put(MockFSDirectoryService.CHECK_INDEX_ON_CLOSE, false) // no checkindex - we corrupt shards on purpose
.put(EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, true)
.put(TranslogService.INDEX_TRANSLOG_DISABLE_FLUSH, true) // no translog based flush - it might change the .liv / segments.N files
.put("indices.recovery.concurrent_streams", 10)
));
Expand Down Expand Up @@ -323,7 +316,6 @@ public void testCorruptionOnNetworkLayerFinalizingRecovery() throws ExecutionExc
assertAcked(prepareCreate("test").setSettings(ImmutableSettings.builder()
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "0")
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, true)
.put("index.routing.allocation.include._name", primariesNode.getNode().name())
.put(EnableAllocationDecider.INDEX_ROUTING_REBALANCE_ENABLE, EnableAllocationDecider.Rebalance.NONE)

Expand Down Expand Up @@ -387,7 +379,6 @@ public void testCorruptionOnNetworkLayer() throws ExecutionException, Interrupte
assertAcked(prepareCreate("test").setSettings(ImmutableSettings.builder()
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "0")
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, between(1, 4)) // don't go crazy here it must recovery fast
.put(EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, true)
// This does corrupt files on the replica, so we can't check:
.put(MockFSDirectoryService.CHECK_INDEX_ON_CLOSE, false)
.put("index.routing.allocation.include._name", primariesNode.getNode().name())
Expand Down Expand Up @@ -472,7 +463,6 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "0") // no replicas for this test
.put(MergePolicyModule.MERGE_POLICY_TYPE_KEY, NoMergePolicyProvider.class)
.put(MockFSDirectoryService.CHECK_INDEX_ON_CLOSE, false) // no checkindex - we corrupt shards on purpose
.put(EngineConfig.INDEX_FAIL_ON_CORRUPTION_SETTING, true)
.put(TranslogService.INDEX_TRANSLOG_DISABLE_FLUSH, true) // no translog based flush - it might change the .liv / segments.N files
.put("indices.recovery.concurrent_streams", 10)
));
Expand Down

0 comments on commit 7861385

Please sign in to comment.