Skip to content

Commit

Permalink
Expose recursion level for Hunspell token filter. Closes #3369
Browse files Browse the repository at this point in the history
  • Loading branch information
lukas-vlcek authored and spinscale committed Jul 25, 2013
1 parent 6101cbf commit 4f0080b
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 6 deletions.
Expand Up @@ -37,6 +37,7 @@ public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {

private final HunspellDictionary dictionary;
private final boolean dedup;
private final int recursionLevel;

@Inject
public HunspellTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings, HunspellService hunspellService) {
Expand All @@ -53,15 +54,24 @@ public HunspellTokenFilterFactory(Index index, @IndexSettings Settings indexSett
}

dedup = settings.getAsBoolean("dedup", true);

recursionLevel = settings.getAsInt("recursion_level", 2);
if (recursionLevel < 0) {
throw new ElasticSearchIllegalArgumentException(String.format(Locale.ROOT, "Negative recursion level not allowed for hunspell [%d]", recursionLevel));
}
}

@Override
public TokenStream create(TokenStream tokenStream) {
return new HunspellStemFilter(tokenStream, dictionary, dedup);
return new HunspellStemFilter(tokenStream, dictionary, dedup, recursionLevel);
}

public boolean dedup() {
return dedup;
}

public int recursionLevel() {
return recursionLevel;
}

}
Expand Up @@ -75,7 +75,7 @@ public class HunspellService extends AbstractComponent {
private final static AffixFileFilter AFFIX_FILE_FILTER = new AffixFileFilter();

private final LoadingCache<String, HunspellDictionary> dictionaries;
private final Map<String, HunspellDictionary> knownDicitionaries;
private final Map<String, HunspellDictionary> knownDictionaries;

private final boolean defaultIgnoreCase;
private final boolean defaultStrictAffixParsing;
Expand All @@ -86,17 +86,17 @@ public HunspellService(final Settings settings, final Environment env) {
}

@Inject
public HunspellService(final Settings settings, final Environment env, final Map<String, HunspellDictionary> knownDicitionaries) {
public HunspellService(final Settings settings, final Environment env, final Map<String, HunspellDictionary> knownDictionaries) {
super(settings);
this.knownDicitionaries = knownDicitionaries;
this.knownDictionaries = knownDictionaries;
this.hunspellDir = resolveHunspellDirectory(settings, env);
this.defaultIgnoreCase = settings.getAsBoolean("indices.analysis.hunspell.dictionary.ignore_case", false);
this.defaultStrictAffixParsing = settings.getAsBoolean("indices.analysis.hunspell.dictionary.strict_affix_parsing", false);
final Version version = Lucene.parseVersion(settings.get("indices.analysis.hunspell.version"), Lucene.ANALYZER_VERSION, logger);
dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, HunspellDictionary>() {
@Override
public HunspellDictionary load(String locale) throws Exception {
HunspellDictionary dictionary = knownDicitionaries.get(locale);
HunspellDictionary dictionary = knownDictionaries.get(locale);
if (dictionary == null) {
dictionary = loadDictionary(locale, settings, env, version);
}
Expand Down Expand Up @@ -146,7 +146,7 @@ private void scanAndLoadDictionaries() {
* @param env The node environment (from which the conf path will be resolved)
* @param version The lucene version
* @return The loaded Hunspell dictionary
* @throws Exception when loading fails (due to IO erros or malformed dictionary files)
* @throws Exception when loading fails (due to IO errors or malformed dictionary files)
*/
private HunspellDictionary loadDictionary(String locale, Settings nodeSettings, Environment env, Version version) throws Exception {
if (logger.isDebugEnabled()) {
Expand Down
Expand Up @@ -19,6 +19,7 @@

package org.elasticsearch.test.unit.index.analysis;

import org.elasticsearch.common.inject.ProvisionException;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
Expand Down Expand Up @@ -63,4 +64,46 @@ public void testDedup() throws IOException {
assertThat(hunspellTokenFilter.dedup(), is(false));
}

@Test
public void testDefaultRecursionLevel() throws IOException {
Settings settings = settingsBuilder()
.put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile())
.put("index.analysis.filter.en_US.type", "hunspell")
.put("index.analysis.filter.en_US.locale", "en_US")
.build();

AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
TokenFilterFactory tokenFilter = analysisService.tokenFilter("en_US");
assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
HunspellTokenFilterFactory hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter;
assertThat(hunspellTokenFilter.recursionLevel(), is(2));
}

@Test
public void testCustomRecursionLevel() throws IOException {
Settings settings = settingsBuilder()
.put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile())
.put("index.analysis.filter.en_US.type", "hunspell")
.put("index.analysis.filter.en_US.recursion_level", 0)
.put("index.analysis.filter.en_US.locale", "en_US")
.build();

AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
TokenFilterFactory tokenFilter = analysisService.tokenFilter("en_US");
assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
HunspellTokenFilterFactory hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter;
assertThat(hunspellTokenFilter.recursionLevel(), is(0));
}

@Test(expected = ProvisionException.class)
public void negativeRecursionLevelShouldFail() throws IOException {
Settings settings = settingsBuilder()
.put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile())
.put("index.analysis.filter.en_US.type", "hunspell")
.put("index.analysis.filter.en_US.recursion_level", -1)
.put("index.analysis.filter.en_US.locale", "en_US")
.build();
AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
}

}

0 comments on commit 4f0080b

Please sign in to comment.