Skip to content

Commit

Permalink
Remove Lucene's deprecated PatternTokenizer
Browse files Browse the repository at this point in the history
Instead of using the PatternTokenizer, the functionality was replicated by using Lucene's StopFilter, PatterTokenizer and LowerCaseFilter

Closes #6717
  • Loading branch information
areek committed Jul 9, 2014
1 parent 541b76c commit 496a6a1
Showing 1 changed file with 32 additions and 2 deletions.
Expand Up @@ -19,8 +19,12 @@

package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.pattern.PatternTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.Version;
Expand All @@ -33,15 +37,41 @@
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;

import java.io.Reader;
import java.util.regex.Pattern;

/**
*
*/
public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<PatternAnalyzer> {
public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<Analyzer> {

private final PatternAnalyzer analyzer;

private static final class PatternAnalyzer extends Analyzer {
private final org.apache.lucene.util.Version version;
private final Pattern pattern;
private final boolean lowercase;
private final CharArraySet stopWords;

PatternAnalyzer(org.apache.lucene.util.Version version, Pattern pattern, boolean lowercase, CharArraySet stopWords) {
this.version = version;
this.pattern = pattern;
this.lowercase = lowercase;
this.stopWords = stopWords;
}

@Override
protected TokenStreamComponents createComponents(String s, Reader reader) {
final TokenStreamComponents source = new TokenStreamComponents(new PatternTokenizer(reader, pattern, -1));
TokenStream result = null;
if (lowercase) {
result = new LowerCaseFilter(version, source.getTokenStream());
}
result = new StopFilter(version, (result == null) ? source.getTokenStream() : result, stopWords);
return new TokenStreamComponents(source.getTokenizer(), result);
}
}

@Inject
public PatternAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
Expand Down

0 comments on commit 496a6a1

Please sign in to comment.