Skip to content

Commit

Permalink
analyzers: custom analyzers names and aliases must not start with _
Browse files Browse the repository at this point in the history
closes #9596
  • Loading branch information
brwe committed May 26, 2015
1 parent f4a6e43 commit b2c3740
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/reference/analysis/analyzers/custom-analyzer.asciidoc
Expand Up @@ -5,6 +5,7 @@ An analyzer of type `custom` that allows to combine a `Tokenizer` with
zero or more `Token Filters`, and zero or more `Char Filters`. The
custom analyzer accepts a logical/registered name of the tokenizer to
use, and a list of logical/registered names of token filters.
The name of the custom analyzer must not start mit "_".

The following are settings that can be set for a `custom` analyzer type:

Expand Down
Expand Up @@ -253,6 +253,11 @@ public AnalysisService(Index index, @IndexSettings Settings indexSettings, @Null
defaultSearchAnalyzer = analyzers.containsKey("default_search") ? analyzers.get("default_search") : analyzers.get("default");
defaultSearchQuoteAnalyzer = analyzers.containsKey("default_search_quote") ? analyzers.get("default_search_quote") : defaultSearchAnalyzer;

for (Map.Entry<String, NamedAnalyzer> analyzer : analyzers.entrySet()) {
if (analyzer.getKey().startsWith("_")) {
throw new IllegalArgumentException("analyzer name must not start with _. got \"" + analyzer.getKey() + "\"");
}
}
this.analyzers = ImmutableMap.copyOf(analyzers);
}

Expand Down
Expand Up @@ -34,6 +34,7 @@
import org.elasticsearch.common.inject.ModulesBuilder;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.inject.ProvisionException;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.SettingsModule;
import org.elasticsearch.env.Environment;
Expand All @@ -51,6 +52,7 @@
import java.io.*;
import java.util.Set;

import static org.apache.lucene.util.LuceneTestCase.createTempDir;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.hamcrest.Matchers.*;

Expand Down Expand Up @@ -89,7 +91,7 @@ public void testSimpleConfigurationYaml() {
Settings settings = loadFromClasspath("org/elasticsearch/index/analysis/test1.yml");
testSimpleConfiguration(settings);
}

@Test
public void testDefaultFactoryTokenFilters() {
assertTokenFilter("keyword_repeat", KeywordRepeatFilter.class);
Expand Down Expand Up @@ -230,4 +232,36 @@ private File generateWordList(String[] words) throws Exception {
return wordListFile;
}

@Test
public void testUnderscoreInAnalyzerName() {
Settings settings = ImmutableSettings.builder()
.put("index.analysis.analyzer._invalid_name.tokenizer", "keyword")
.put("path.home", createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, "1")
.build();
try {
getAnalysisService(settings);
fail("This should fail with IllegalArgumentException because the analyzers name starts with _");
} catch (ProvisionException e) {
assertTrue(e.getCause() instanceof IllegalArgumentException);
assertThat(e.getCause().getMessage(), equalTo("analyzer name must not start with _. got \"_invalid_name\""));
}
}

@Test
public void testUnderscoreInAnalyzerNameAlias() {
Settings settings = ImmutableSettings.builder()
.put("index.analysis.analyzer.valid_name.tokenizer", "keyword")
.put("index.analysis.analyzer.valid_name.alias", "_invalid_name")
.put("path.home", createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, "1")
.build();
try {
getAnalysisService(settings);
fail("This should fail with IllegalArgumentException because the analyzers alias starts with _");
} catch (ProvisionException e) {
assertTrue(e.getCause() instanceof IllegalArgumentException);
assertThat(e.getCause().getMessage(), equalTo("analyzer name must not start with _. got \"_invalid_name\""));
}
}
}

0 comments on commit b2c3740

Please sign in to comment.