diff --git a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java index 72222d5670235..3553c2de03fa0 100644 --- a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java +++ b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java @@ -93,18 +93,41 @@ public void reset(QueryParserSettings settings) { } else { this.field = null; } - setAnalyzer(settings.analyzer()); - setMultiTermRewriteMethod(settings.rewriteMethod()); + if (settings.analyzer() != null) { + setAnalyzer(settings.analyzer()); + } else { + setAnalyzer(context.getMapperService().searchAnalyzer()); + } + if (settings.rewriteMethod() != null) { + setMultiTermRewriteMethod(settings.rewriteMethod()); + } setEnablePositionIncrements(settings.enablePositionIncrements()); setAutoGeneratePhraseQueries(settings.autoGeneratePhraseQueries()); setMaxDeterminizedStates(settings.maxDeterminizedStates()); setAllowLeadingWildcard(settings.allowLeadingWildcard()); - setLowercaseExpandedTerms(settings.lowercaseExpandedTerms()); + setLowercaseExpandedTerms(false); // no need for it, we use the mappings to figure it out setPhraseSlop(settings.phraseSlop()); setDefaultOperator(settings.defaultOperator()); setFuzzyMinSim(settings.fuzziness().asFloat()); setFuzzyPrefixLength(settings.fuzzyPrefixLength()); - setLocale(settings.locale()); + } + + private void setMultiTermAnalyzer() { + if (settings.multiTermAnalyzer() != null) { + setAnalyzer(settings.multiTermAnalyzer()); + } else { + setAnalyzer(context.getMapperService().searchMultiTermAnalyzer()); + } + } + + private void setQuoteAnalyzer() { + if (settings.quoteAnalyzer() != null) { + setAnalyzer(settings.quoteAnalyzer()); + } else if (settings.analyzer() != null) { + setAnalyzer(settings.analyzer()); + } else { + setAnalyzer(context.getMapperService().searchQuoteAnalyzer()); + } } /** @@ -197,31 +220,21 @@ private Query getFieldQuerySingle(String field, String queryText, boolean quoted currentFieldType = null; Analyzer oldAnalyzer = getAnalyzer(); try { - if (quoted) { - setAnalyzer(settings.quoteAnalyzer()); - if (settings.quoteFieldSuffix() != null) { - currentFieldType = context.fieldMapper(field + settings.quoteFieldSuffix()); - } + if (quoted && settings.quoteFieldSuffix() != null) { + currentFieldType = context.fieldMapper(field + settings.quoteFieldSuffix()); } if (currentFieldType == null) { currentFieldType = context.fieldMapper(field); } if (currentFieldType != null) { if (quoted) { - if (!settings.forceQuoteAnalyzer()) { - setAnalyzer(context.getSearchQuoteAnalyzer(currentFieldType)); - } - } else { - if (!settings.forceAnalyzer()) { - setAnalyzer(context.getSearchAnalyzer(currentFieldType)); - } + setQuoteAnalyzer(); } if (currentFieldType != null) { - Query query = null; if (currentFieldType.tokenized() == false) { // this might be a structured field like a numeric try { - query = currentFieldType.termQuery(queryText, context); + return currentFieldType.termQuery(queryText, context); } catch (RuntimeException e) { if (settings.lenient()) { return null; @@ -230,10 +243,6 @@ private Query getFieldQuerySingle(String field, String queryText, boolean quoted } } } - if (query == null) { - query = super.getFieldQuery(currentFieldType.name(), queryText, quoted); - } - return query; } } return super.getFieldQuery(field, queryText, quoted); @@ -328,39 +337,61 @@ protected Query getRangeQuery(String field, String part1, String part2, private Query getRangeQuerySingle(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) { - currentFieldType = context.fieldMapper(field); - if (currentFieldType != null) { - if (lowercaseExpandedTerms && currentFieldType.tokenized()) { - part1 = part1 == null ? null : part1.toLowerCase(locale); - part2 = part2 == null ? null : part2.toLowerCase(locale); - } + Analyzer oldAnalyzer = getAnalyzer(); + try { + setMultiTermAnalyzer(); + currentFieldType = context.fieldMapper(field); + if (currentFieldType != null) { + try { + if (currentFieldType.tokenized()) { + part1 = part1 == null ? null : analyzeSingleToken(field, part1); + part2 = part2 == null ? null : analyzeSingleToken(field, part2); + } - try { - Query rangeQuery; - if (currentFieldType instanceof LegacyDateFieldMapper.DateFieldType && settings.timeZone() != null) { - LegacyDateFieldMapper.DateFieldType dateFieldType = (LegacyDateFieldMapper.DateFieldType) this.currentFieldType; - rangeQuery = dateFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, settings.timeZone(), null); - } else if (currentFieldType instanceof DateFieldMapper.DateFieldType && settings.timeZone() != null) { - DateFieldMapper.DateFieldType dateFieldType = (DateFieldMapper.DateFieldType) this.currentFieldType; - rangeQuery = dateFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, settings.timeZone(), null); - } else { - rangeQuery = currentFieldType.rangeQuery(part1, part2, startInclusive, endInclusive); - } - return rangeQuery; - } catch (RuntimeException e) { - if (settings.lenient()) { - return null; + Query rangeQuery; + if (currentFieldType instanceof LegacyDateFieldMapper.DateFieldType && settings.timeZone() != null) { + LegacyDateFieldMapper.DateFieldType dateFieldType = (LegacyDateFieldMapper.DateFieldType) this.currentFieldType; + rangeQuery = dateFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, settings.timeZone(), null); + } else if (currentFieldType instanceof DateFieldMapper.DateFieldType && settings.timeZone() != null) { + DateFieldMapper.DateFieldType dateFieldType = (DateFieldMapper.DateFieldType) this.currentFieldType; + rangeQuery = dateFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, settings.timeZone(), null); + } else { + rangeQuery = currentFieldType.rangeQuery(part1, part2, startInclusive, endInclusive); + } + return rangeQuery; + } catch (RuntimeException e) { + if (settings.lenient()) { + return null; + } + throw e; } - throw e; } + return newRangeQuery(field, part1, part2, startInclusive, endInclusive); + } finally { + setAnalyzer(oldAnalyzer); } - return newRangeQuery(field, part1, part2, startInclusive, endInclusive); } - protected Query getFuzzyQuery(String field, String termStr, String minSimilarity) throws ParseException { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(locale); + private String analyzeSingleToken(String field, String value) { + try (TokenStream tk = getAnalyzer().tokenStream(field, value)) { + CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class); + tk.reset(); + final String term; + if (tk.incrementToken()) { + term = termAtt.toString(); + } else { + throw new IllegalStateException("Expected 1 token but got 0"); + } + if (tk.incrementToken()) { + throw new IllegalStateException("Expected 1 token but got 2 or more"); + } + return term; + } catch (IOException e) { + throw new IllegalStateException("Cannot happen", e); } + } + + protected Query getFuzzyQuery(String field, String termStr, String minSimilarity) throws ParseException { Collection fields = extractMultiFields(field); if (fields != null) { if (fields.size() == 1) { @@ -396,19 +427,26 @@ protected Query getFuzzyQuery(String field, String termStr, String minSimilarity } private Query getFuzzyQuerySingle(String field, String termStr, String minSimilarity) throws ParseException { - currentFieldType = context.fieldMapper(field); - if (currentFieldType != null) { - try { - return currentFieldType.fuzzyQuery(termStr, Fuzziness.build(minSimilarity), - fuzzyPrefixLength, settings.fuzzyMaxExpansions(), FuzzyQuery.defaultTranspositions); - } catch (RuntimeException e) { - if (settings.lenient()) { - return null; + Analyzer oldAnalyzer = getAnalyzer(); + try { + setMultiTermAnalyzer(); + currentFieldType = context.fieldMapper(field); + if (currentFieldType != null) { + try { + termStr = termStr == null ? null : analyzeSingleToken(field, termStr); + return currentFieldType.fuzzyQuery(termStr, Fuzziness.build(minSimilarity), + fuzzyPrefixLength, settings.fuzzyMaxExpansions(), FuzzyQuery.defaultTranspositions); + } catch (RuntimeException e) { + if (settings.lenient()) { + return null; + } + throw e; } - throw e; } + return super.getFuzzyQuery(field, termStr, Float.parseFloat(minSimilarity)); + } finally { + setAnalyzer(oldAnalyzer); } - return super.getFuzzyQuery(field, termStr, Float.parseFloat(minSimilarity)); } @Override @@ -423,9 +461,6 @@ protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLeng @Override protected Query getPrefixQuery(String field, String termStr) throws ParseException { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(locale); - } Collection fields = extractMultiFields(field); if (fields != null) { if (fields.size() == 1) { @@ -467,9 +502,6 @@ private Query getPrefixQuerySingle(String field, String termStr) throws ParseExc try { currentFieldType = context.fieldMapper(field); if (currentFieldType != null) { - if (!settings.forceAnalyzer()) { - setAnalyzer(context.getSearchAnalyzer(currentFieldType)); - } Query query = null; if (currentFieldType.tokenized() == false) { query = currentFieldType.prefixQuery(termStr, multiTermRewriteMethod, context); @@ -491,8 +523,8 @@ private Query getPrefixQuerySingle(String field, String termStr) throws ParseExc } private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr) throws ParseException { - if (!settings.analyzeWildcard()) { - return super.getPrefixQuery(field, termStr); + if (settings.analyzeWildcard() == false) { + setMultiTermAnalyzer(); } List > tlist; // get Analyzer from superclass and tokenize the term @@ -590,9 +622,6 @@ protected Query getWildcardQuery(String field, String termStr) throws ParseExcep return FIELD_QUERY_EXTENSIONS.get(ExistsFieldQueryExtension.NAME).query(context, actualField); } } - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(locale); - } Collection fields = extractMultiFields(field); if (fields != null) { if (fields.size() == 1) { @@ -629,19 +658,10 @@ protected Query getWildcardQuery(String field, String termStr) throws ParseExcep } private Query getWildcardQuerySingle(String field, String termStr) throws ParseException { - String indexedNameField = field; currentFieldType = null; Analyzer oldAnalyzer = getAnalyzer(); try { - currentFieldType = context.fieldMapper(field); - if (currentFieldType != null) { - if (!settings.forceAnalyzer()) { - setAnalyzer(context.getSearchAnalyzer(currentFieldType)); - } - indexedNameField = currentFieldType.name(); - return getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr); - } - return getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr); + return getPossiblyAnalyzedWildcardQuery(field, termStr); } catch (RuntimeException e) { if (settings.lenient()) { return null; @@ -653,8 +673,8 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE } private Query getPossiblyAnalyzedWildcardQuery(String field, String termStr) throws ParseException { - if (!settings.analyzeWildcard()) { - return super.getWildcardQuery(field, termStr); + if (settings.analyzeWildcard() == false) { + setMultiTermAnalyzer(); } boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*")); StringBuilder aggStr = new StringBuilder(); @@ -718,9 +738,6 @@ private Query getPossiblyAnalyzedWildcardQuery(String field, String termStr) thr @Override protected Query getRegexpQuery(String field, String termStr) throws ParseException { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(locale); - } Collection fields = extractMultiFields(field); if (fields != null) { if (fields.size() == 1) { @@ -760,11 +777,9 @@ private Query getRegexpQuerySingle(String field, String termStr) throws ParseExc currentFieldType = null; Analyzer oldAnalyzer = getAnalyzer(); try { + setMultiTermAnalyzer(); currentFieldType = context.fieldMapper(field); if (currentFieldType != null) { - if (!settings.forceAnalyzer()) { - setAnalyzer(context.getSearchAnalyzer(currentFieldType)); - } Query query = null; if (currentFieldType.tokenized() == false) { query = currentFieldType.regexpQuery(termStr, RegExp.ALL, diff --git a/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java b/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java index c1fc2ae556ea7..ded525370e4d0 100644 --- a/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java +++ b/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java @@ -20,11 +20,13 @@ package org.apache.lucene.queryparser.classic; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.util.automaton.Operations; import org.elasticsearch.common.unit.Fuzziness; import org.joda.time.DateTimeZone; -import java.util.Locale; +import java.util.Collections; import java.util.Map; /** @@ -32,18 +34,15 @@ */ public class QueryParserSettings { - private final String queryString; - private String defaultField; - private Map fieldsAndWeights; + private Map fieldsAndWeights = Collections.emptyMap(); private QueryParser.Operator defaultOperator; private Analyzer analyzer; - private boolean forceAnalyzer; + private Analyzer multiTermAnalyzer; private Analyzer quoteAnalyzer; - private boolean forceQuoteAnalyzer; private String quoteFieldSuffix; @@ -53,15 +52,11 @@ public class QueryParserSettings { private boolean analyzeWildcard; - private boolean lowercaseExpandedTerms; - private boolean enablePositionIncrements; - private Locale locale; - - private Fuzziness fuzziness; - private int fuzzyPrefixLength; - private int fuzzyMaxExpansions; + private Fuzziness fuzziness = Fuzziness.AUTO; + private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; + private int fuzzyMaxExpansions = FuzzyQuery.defaultMaxExpansions; private MultiTermQuery.RewriteMethod fuzzyRewriteMethod; private int phraseSlop; @@ -77,15 +72,7 @@ public class QueryParserSettings { private DateTimeZone timeZone; /** To limit effort spent determinizing regexp queries. */ - private int maxDeterminizedStates; - - public QueryParserSettings(String queryString) { - this.queryString = queryString; - } - - public String queryString() { - return queryString; - } + private int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES; public String defaultField() { return defaultField; @@ -135,14 +122,6 @@ public void allowLeadingWildcard(boolean allowLeadingWildcard) { this.allowLeadingWildcard = allowLeadingWildcard; } - public boolean lowercaseExpandedTerms() { - return lowercaseExpandedTerms; - } - - public void lowercaseExpandedTerms(boolean lowercaseExpandedTerms) { - this.lowercaseExpandedTerms = lowercaseExpandedTerms; - } - public boolean enablePositionIncrements() { return enablePositionIncrements; } @@ -183,42 +162,27 @@ public void fuzzyRewriteMethod(MultiTermQuery.RewriteMethod fuzzyRewriteMethod) this.fuzzyRewriteMethod = fuzzyRewriteMethod; } - public void defaultAnalyzer(Analyzer analyzer) { + public void analyzer(Analyzer analyzer, Analyzer multiTermAnalyzer) { this.analyzer = analyzer; - this.forceAnalyzer = false; - } - - public void forceAnalyzer(Analyzer analyzer) { - this.analyzer = analyzer; - this.forceAnalyzer = true; + this.multiTermAnalyzer = multiTermAnalyzer; } public Analyzer analyzer() { return analyzer; } - public boolean forceAnalyzer() { - return forceAnalyzer; - } - - public void defaultQuoteAnalyzer(Analyzer quoteAnalyzer) { - this.quoteAnalyzer = quoteAnalyzer; - this.forceQuoteAnalyzer = false; + public Analyzer multiTermAnalyzer() { + return multiTermAnalyzer; } - public void forceQuoteAnalyzer(Analyzer quoteAnalyzer) { + public void quoteAnalyzer(Analyzer quoteAnalyzer) { this.quoteAnalyzer = quoteAnalyzer; - this.forceQuoteAnalyzer = true; } public Analyzer quoteAnalyzer() { return quoteAnalyzer; } - public boolean forceQuoteAnalyzer() { - return forceQuoteAnalyzer; - } - public boolean analyzeWildcard() { return this.analyzeWildcard; } @@ -267,14 +231,6 @@ public void useDisMax(boolean useDisMax) { this.useDisMax = useDisMax; } - public void locale(Locale locale) { - this.locale = locale; - } - - public Locale locale() { - return this.locale; - } - public void timeZone(DateTimeZone timeZone) { this.timeZone = timeZone; } diff --git a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java index 4536670872d5b..8ea94c29c285e 100644 --- a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java +++ b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java @@ -32,6 +32,7 @@ import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.indices.mapper.MapperRegistry; +import java.io.IOException; import java.util.Collections; import static org.elasticsearch.common.util.set.Sets.newHashSet; @@ -167,9 +168,12 @@ public NamedAnalyzer analyzer(String name) { } @Override - public void close() { - fakeAnalyzer.close(); - super.close(); + public void close() throws IOException { + try { + fakeAnalyzer.close(); + } finally { + super.close(); + } } } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/AbstractIndexAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/AbstractIndexAnalyzerProvider.java index c0406cb806eae..c61eab5a73934 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/AbstractIndexAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/AbstractIndexAnalyzerProvider.java @@ -20,6 +20,11 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.util.Version; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.AbstractIndexComponent; @@ -28,7 +33,7 @@ /** * */ -public abstract class AbstractIndexAnalyzerProvider extends AbstractIndexComponent implements AnalyzerProvider { +public abstract class AbstractIndexAnalyzerProvider extends AbstractIndexComponent implements AnalyzerProvider { private final String name; @@ -58,4 +63,26 @@ public final String name() { public final AnalyzerScope scope() { return AnalyzerScope.INDEX; } + + private Analyzer multiTermAnalyzer = null; + + @Override + public synchronized Analyzer getMultiTerm() { + if (multiTermAnalyzer == null) { + // default impl that should work at least for most european + // languages. Eg. this should work with the standard and english + // analyzers + multiTermAnalyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + final Tokenizer source = new KeywordTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + return new TokenStreamComponents(source, result); + } + }; + } + return multiTermAnalyzer; + } + } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index 2d73df76f07e1..2cb49c3f6e04c 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -23,7 +23,6 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; @@ -396,7 +395,9 @@ private PrebuiltAnalysis() { // Analyzers for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) { String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT); - analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, AnalyzerScope.INDICES, preBuiltAnalyzerEnum.getAnalyzer(Version.CURRENT))); + analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, AnalyzerScope.INDICES, + preBuiltAnalyzerEnum.getAnalyzer(Version.CURRENT), + preBuiltAnalyzerEnum.getMultiTermAnalyzer(Version.CURRENT))); } // Tokenizers diff --git a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java index b9146df8c96e1..0edd17535926a 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.util.IOUtils; import org.elasticsearch.Version; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; @@ -28,8 +29,12 @@ import org.elasticsearch.index.mapper.core.TextFieldMapper; import java.io.Closeable; +import java.io.IOException; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static java.util.Collections.unmodifiableMap; @@ -38,17 +43,34 @@ */ public class AnalysisService extends AbstractIndexComponent implements Closeable { + private static NamedAnalyzer buildNamedAnalyzer(String name, AnalyzerProvider analyzerFactory, + Analyzer analyzerF, int overridePositionIncrementGap) { + if (analyzerF instanceof NamedAnalyzer) { + // if we got a named analyzer back, use it... + NamedAnalyzer analyzer = (NamedAnalyzer) analyzerF; + if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) { + // unless the positionIncrementGap needs to be overridden + analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap); + } + return analyzer; + } else { + return new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap); + } + } + private final Map analyzers; + private final Map multiTermAnalyzers; private final Map tokenizers; private final Map charFilters; private final Map tokenFilters; private final NamedAnalyzer defaultIndexAnalyzer; private final NamedAnalyzer defaultSearchAnalyzer; + private final NamedAnalyzer defaultSearchMultiTermAnalyzer; private final NamedAnalyzer defaultSearchQuoteAnalyzer; public AnalysisService(IndexSettings indexSettings, - Map analyzerProviders, + Map analyzerFactories, Map tokenizerFactoryFactories, Map charFilterFactoryFactories, Map tokenFilterFactoryFactories) { @@ -56,20 +78,21 @@ public AnalysisService(IndexSettings indexSettings, this.tokenizers = unmodifiableMap(tokenizerFactoryFactories); this.charFilters = unmodifiableMap(charFilterFactoryFactories); this.tokenFilters = unmodifiableMap(tokenFilterFactoryFactories); - analyzerProviders = new HashMap<>(analyzerProviders); + analyzerFactories = new HashMap<>(analyzerFactories); - if (!analyzerProviders.containsKey("default")) { - analyzerProviders.put("default", new StandardAnalyzerProvider(indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS)); + if (!analyzerFactories.containsKey("default")) { + analyzerFactories.put("default", new StandardAnalyzerProvider(indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS)); } - if (!analyzerProviders.containsKey("default_search")) { - analyzerProviders.put("default_search", analyzerProviders.get("default")); + if (!analyzerFactories.containsKey("default_search")) { + analyzerFactories.put("default_search", analyzerFactories.get("default")); } - if (!analyzerProviders.containsKey("default_search_quoted")) { - analyzerProviders.put("default_search_quoted", analyzerProviders.get("default_search")); + if (!analyzerFactories.containsKey("default_search_quoted")) { + analyzerFactories.put("default_search_quoted", analyzerFactories.get("default_search")); } Map analyzers = new HashMap<>(); - for (Map.Entry entry : analyzerProviders.entrySet()) { + Map multiTermAnalyzers = new HashMap<>(); + for (Map.Entry entry : analyzerFactories.entrySet()) { AnalyzerProvider analyzerFactory = entry.getValue(); String name = entry.getKey(); /* @@ -94,30 +117,25 @@ public AnalysisService(IndexSettings indexSettings, if (analyzerF == null) { throw new IllegalArgumentException("analyzer [" + analyzerFactory.name() + "] created null analyzer"); } - NamedAnalyzer analyzer; - if (analyzerF instanceof NamedAnalyzer) { - // if we got a named analyzer back, use it... - analyzer = (NamedAnalyzer) analyzerF; - if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) { - // unless the positionIncrementGap needs to be overridden - analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap); - } - } else { - analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap); - } + NamedAnalyzer analyzer = buildNamedAnalyzer(name, analyzerFactory, analyzerF, overridePositionIncrementGap); + Analyzer multiTermAnalyzerF = analyzerFactory.getMultiTerm(); + NamedAnalyzer multiTermAnalyzer = buildNamedAnalyzer(name, analyzerFactory, multiTermAnalyzerF, overridePositionIncrementGap); if (analyzers.containsKey(name)) { throw new IllegalStateException("already registered analyzer with name: " + name); } analyzers.put(name, analyzer); + multiTermAnalyzers.put(name, multiTermAnalyzer); String strAliases = this.indexSettings.getSettings().get("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); if (strAliases != null) { for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) { analyzers.put(alias, analyzer); + multiTermAnalyzers.put(alias, multiTermAnalyzer); } } String[] aliases = this.indexSettings.getSettings().getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); for (String alias : aliases) { analyzers.put(alias, analyzer); + multiTermAnalyzers.put(alias, multiTermAnalyzer); } } @@ -133,38 +151,54 @@ public AnalysisService(IndexSettings indexSettings, deprecationLogger.deprecated("setting [index.analysis.analyzer.default_index] is deprecated, use [index.analysis.analyzer.default] instead for index [{}]", index().getName()); } } - defaultIndexAnalyzer = analyzers.containsKey("default_index") ? analyzers.get("default_index") : defaultAnalyzer; - defaultSearchAnalyzer = analyzers.containsKey("default_search") ? analyzers.get("default_search") : defaultAnalyzer; - defaultSearchQuoteAnalyzer = analyzers.containsKey("default_search_quote") ? analyzers.get("default_search_quote") : defaultSearchAnalyzer; - + NamedAnalyzer defaultIndexAnalyzer = analyzers.get("default_index"); + if (defaultIndexAnalyzer == null) { + defaultIndexAnalyzer = defaultAnalyzer; + } + NamedAnalyzer defaultSearchAnalyzer = analyzers.get("default_search"); + if (defaultSearchAnalyzer == null) { + defaultSearchAnalyzer = defaultAnalyzer; + } + NamedAnalyzer defaultSearchQuoteAnalyzer = analyzers.get("default_search_quote"); + if (defaultSearchQuoteAnalyzer == null) { + defaultSearchQuoteAnalyzer = defaultSearchAnalyzer; + } + NamedAnalyzer defaultSearchMultiTermAnalyzer = multiTermAnalyzers.get("default_search"); + if (defaultSearchMultiTermAnalyzer == null) { + defaultSearchMultiTermAnalyzer = multiTermAnalyzers.get("default"); + } + this.defaultIndexAnalyzer = defaultIndexAnalyzer; + this.defaultSearchAnalyzer = defaultSearchAnalyzer; + this.defaultSearchQuoteAnalyzer = defaultSearchQuoteAnalyzer; + this.defaultSearchMultiTermAnalyzer = defaultSearchMultiTermAnalyzer; for (Map.Entry analyzer : analyzers.entrySet()) { if (analyzer.getKey().startsWith("_")) { throw new IllegalArgumentException("analyzer name must not start with '_'. got \"" + analyzer.getKey() + "\""); } } + assert analyzers.keySet().equals(multiTermAnalyzers.keySet()); this.analyzers = unmodifiableMap(analyzers); + this.multiTermAnalyzers = unmodifiableMap(multiTermAnalyzers); } @Override - public void close() { - for (NamedAnalyzer analyzer : analyzers.values()) { - if (analyzer.scope() == AnalyzerScope.INDEX) { - try { - analyzer.close(); - } catch (NullPointerException e) { - // because analyzers are aliased, they might be closed several times - // an NPE is thrown in this case, so ignore.... - } catch (Exception e) { - logger.debug("failed to close analyzer {}", analyzer); - } - } - } + public void close() throws IOException { + List indexAnalyzers = Stream.concat( + analyzers.values().stream(), + multiTermAnalyzers.values().stream()) + .filter(analyzer -> analyzer.scope() == AnalyzerScope.INDEX) + .collect(Collectors.toList()); + IOUtils.close(indexAnalyzers); } public NamedAnalyzer analyzer(String name) { return analyzers.get(name); } + public NamedAnalyzer multiTermAnalyzer(String name) { + return multiTermAnalyzers.get(name); + } + public NamedAnalyzer defaultIndexAnalyzer() { return defaultIndexAnalyzer; } @@ -173,6 +207,10 @@ public NamedAnalyzer defaultSearchAnalyzer() { return defaultSearchAnalyzer; } + public NamedAnalyzer defaultSearchMultiTermAnalyzer() { + return defaultSearchMultiTermAnalyzer; + } + public NamedAnalyzer defaultSearchQuoteAnalyzer() { return defaultSearchQuoteAnalyzer; } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/AnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/AnalyzerProvider.java index ef49d5c8dac9b..4d14ee629b4dd 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/AnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/AnalyzerProvider.java @@ -25,12 +25,12 @@ /** * */ -public interface AnalyzerProvider extends Provider { +public interface AnalyzerProvider extends Provider { String name(); AnalyzerScope scope(); - @Override - T get(); + /** Get the analyzer that should be used for multi-term queries. */ + Analyzer getMultiTerm(); } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java index 5a1754a02fec1..68b07e1c4f640 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final ArabicAnalyzer arabicAnalyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java index fabb8b0738866..7f24f06d7d57d 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final ArmenianAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java index 1ceffd43c81aa..c4bc4ef519a56 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final BasqueAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java index 9e33dce3a9d44..a3cdb2430fae4 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final BrazilianAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java index e93233169b9e9..5d1c9af7e2681 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final BulgarianAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java index 04c068a4371c4..608e11bfb349e 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final CatalanAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/ChineseAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/ChineseAnalyzerProvider.java index 10e6f0dc42f1e..9fe01184c1a07 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/ChineseAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/ChineseAnalyzerProvider.java @@ -27,7 +27,7 @@ /** * Only for old indexes */ -public class ChineseAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class ChineseAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final StandardAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/CjkAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/CjkAnalyzerProvider.java index 57796c1651533..84e4452f4456d 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/CjkAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/CjkAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class CjkAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class CjkAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final CJKAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java index a8bfd996a3e4f..9687889f8f4c6 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java @@ -19,6 +19,9 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.KeywordTokenizer; import org.elasticsearch.Version; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettings; @@ -31,11 +34,12 @@ * A custom analyzer that is built out of a single {@link org.apache.lucene.analysis.Tokenizer} and a list * of {@link org.apache.lucene.analysis.TokenFilter}s. */ -public class CustomAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class CustomAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final Settings analyzerSettings; private CustomAnalyzer customAnalyzer; + private CustomAnalyzer customMultiTermAnalyzer; public CustomAnalyzerProvider(IndexSettings indexSettings, String name, Settings settings) { @@ -98,10 +102,41 @@ public void build(AnalysisService analysisService) { positionIncrementGap, offsetGap ); + + TokenizerFactory multiTermTokenizer = tokenizer; + if (multiTermTokenizer instanceof MultiTermAwareComponent) { + multiTermTokenizer = (TokenizerFactory) ((MultiTermAwareComponent) multiTermTokenizer).getMultiTermComponent(); + } else { + multiTermTokenizer = new TokenizerFactory() { + @Override + public String name() { + return "keyword"; + } + @Override + public Tokenizer create() { + return new KeywordTokenizer(); + } + }; + } + CharFilterFactory[] multiTermCharFilters = charFilters.stream() + .filter(charFilterFactory -> charFilterFactory instanceof MultiTermAwareComponent) + .map(charFilterFactory -> (CharFilterFactory) ((MultiTermAwareComponent) charFilterFactory).getMultiTermComponent()) + .toArray(size -> new CharFilterFactory[size]); + TokenFilterFactory[] multiTermTokenFilters = tokenFilters.stream() + .filter(tokenFilterFactory -> tokenFilterFactory instanceof MultiTermAwareComponent) + .map(tokenFilterFactory -> (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilterFactory).getMultiTermComponent()) + .toArray(size -> new TokenFilterFactory[size]); + this.customMultiTermAnalyzer = new CustomAnalyzer(multiTermTokenizer, multiTermCharFilters, multiTermTokenFilters, + positionIncrementGap, offsetGap); } @Override public CustomAnalyzer get() { return this.customAnalyzer; } + + @Override + public Analyzer getMultiTerm() { + return customMultiTermAnalyzer; + } } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java index 6480b13965374..3be2996cac4a5 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final CzechAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java index adf1290d25537..80756b7b1fd7d 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final DanishAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java index cff7a6f2087fa..ad4c09dc33ebe 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final DutchAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java index 271934bbd79b0..aca4260c6e2a5 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final EnglishAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/FingerprintAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/FingerprintAnalyzerProvider.java index bb8a51e096918..fc69ec448b196 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/FingerprintAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/FingerprintAnalyzerProvider.java @@ -31,7 +31,7 @@ * Builds an OpenRefine Fingerprint analyzer. Uses the default settings from the various components * (Standard Tokenizer and lowercase + stop + fingerprint + ascii-folding filters) */ -public class FingerprintAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class FingerprintAnalyzerProvider extends AbstractIndexAnalyzerProvider { public static ParseField MAX_OUTPUT_SIZE = FingerprintTokenFilterFactory.MAX_OUTPUT_SIZE; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java index 676da5f0c0cb2..ababe56d6f7bc 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final FinnishAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java index 51314633d25c2..f135087c08902 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final FrenchAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java index 0be7f76bf054f..8156d3b646133 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final GalicianAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java index 98e5adf852859..95b1fcb4e3be2 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final GermanAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/GreekAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/GreekAnalyzerProvider.java index 4550af52cec03..bd394bca2184a 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/GreekAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/GreekAnalyzerProvider.java @@ -27,7 +27,7 @@ /** * */ -public class GreekAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class GreekAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final GreekAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java index 4e628e5e301f8..49d0c92d66668 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class HindiAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class HindiAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final HindiAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/HungarianAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/HungarianAnalyzerProvider.java index 751ef0094f6ee..5e44742dc85de 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/HungarianAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/HungarianAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class HungarianAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class HungarianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final HungarianAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/IndonesianAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/IndonesianAnalyzerProvider.java index f15cc74a9a939..ca6eab556247b 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/IndonesianAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/IndonesianAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class IndonesianAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class IndonesianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final IndonesianAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/IrishAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/IrishAnalyzerProvider.java index 813db1d36be51..5b00e2ab88abd 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/IrishAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/IrishAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * Provider for {@link IrishAnalyzer} */ -public class IrishAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class IrishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final IrishAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/ItalianAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/ItalianAnalyzerProvider.java index 9457b45e9f2f5..cef9c0866cbde 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/ItalianAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/ItalianAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class ItalianAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class ItalianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final ItalianAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/KeywordAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/KeywordAnalyzerProvider.java index 0bf134cb38040..3bc6ea41cf826 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/KeywordAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/KeywordAnalyzerProvider.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; @@ -27,7 +28,7 @@ /** * */ -public class KeywordAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class KeywordAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final KeywordAnalyzer keywordAnalyzer; @@ -40,4 +41,9 @@ public KeywordAnalyzerProvider(IndexSettings indexSettings, Environment environm public KeywordAnalyzer get() { return this.keywordAnalyzer; } + + @Override + public Analyzer getMultiTerm() { + return get(); + } } \ No newline at end of file diff --git a/core/src/main/java/org/elasticsearch/index/analysis/LatvianAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/LatvianAnalyzerProvider.java index 01865c17d1436..7bf05dbc5ffbf 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/LatvianAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/LatvianAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class LatvianAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class LatvianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final LatvianAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/LithuanianAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/LithuanianAnalyzerProvider.java index 9b24eaa16a12d..0f5a0c73d5ec4 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/LithuanianAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/LithuanianAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * Provider for {@link LithuanianAnalyzer} */ -public class LithuanianAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class LithuanianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final LithuanianAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/NorwegianAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/NorwegianAnalyzerProvider.java index 1b136bfcef90c..915dc317b2505 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/NorwegianAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/NorwegianAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class NorwegianAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class NorwegianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final NorwegianAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PatternAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/PatternAnalyzerProvider.java index f00988f4ad2b2..5beae9598b9b0 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/PatternAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/PatternAnalyzerProvider.java @@ -33,7 +33,7 @@ /** * */ -public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final PatternAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PersianAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/PersianAnalyzerProvider.java index 66bebe579ac5d..037f91a422c24 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/PersianAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/PersianAnalyzerProvider.java @@ -27,7 +27,7 @@ /** * */ -public class PersianAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class PersianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final PersianAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java index 11cbaac4ad021..c5a7085517081 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class PortugueseAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class PortugueseAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final PortugueseAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerProvider.java index af87d090de482..72a0cab980151 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerProvider.java @@ -24,15 +24,17 @@ /** * */ -public class PreBuiltAnalyzerProvider implements AnalyzerProvider { +public class PreBuiltAnalyzerProvider implements AnalyzerProvider { private final NamedAnalyzer analyzer; + private final NamedAnalyzer multiTermAnalyzer; - public PreBuiltAnalyzerProvider(String name, AnalyzerScope scope, Analyzer analyzer) { + public PreBuiltAnalyzerProvider(String name, AnalyzerScope scope, Analyzer analyzer, Analyzer multiTermAnalyzer) { // we create the named analyzer here so the resources associated with it will be shared // and we won't wrap a shared analyzer with named analyzer each time causing the resources // to not be shared... this.analyzer = new NamedAnalyzer(name, scope, analyzer); + this.multiTermAnalyzer = new NamedAnalyzer(name, scope, multiTermAnalyzer); } @Override @@ -49,4 +51,9 @@ public AnalyzerScope scope() { public NamedAnalyzer get() { return analyzer; } -} + + @Override + public Analyzer getMultiTerm() { + return multiTermAnalyzer; + } +} \ No newline at end of file diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerProviderFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerProviderFactory.java index 00724dccfa19d..47bc638d75afd 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerProviderFactory.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerProviderFactory.java @@ -36,8 +36,8 @@ public class PreBuiltAnalyzerProviderFactory implements AnalysisModule.AnalysisP private final PreBuiltAnalyzerProvider analyzerProvider; - public PreBuiltAnalyzerProviderFactory(String name, AnalyzerScope scope, Analyzer analyzer) { - analyzerProvider = new PreBuiltAnalyzerProvider(name, scope, analyzer); + public PreBuiltAnalyzerProviderFactory(String name, AnalyzerScope scope, Analyzer analyzer, Analyzer multiTermAnalyzer) { + analyzerProvider = new PreBuiltAnalyzerProvider(name, scope, analyzer, multiTermAnalyzer); } public AnalyzerProvider create(String name, Settings settings) { @@ -46,7 +46,8 @@ public AnalyzerProvider create(String name, Settings settings) { PreBuiltAnalyzers preBuiltAnalyzers = PreBuiltAnalyzers.getOrDefault(name, null); if (preBuiltAnalyzers != null) { Analyzer analyzer = preBuiltAnalyzers.getAnalyzer(indexVersion); - return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, analyzer); + Analyzer multiTermAnalyzer = preBuiltAnalyzers.getMultiTermAnalyzer(indexVersion); + return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, analyzer, multiTermAnalyzer); } } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/RomanianAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/RomanianAnalyzerProvider.java index a455cef3ad129..6a7a8bdd79209 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/RomanianAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/RomanianAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class RomanianAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class RomanianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final RomanianAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/RussianAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/RussianAnalyzerProvider.java index fca42325e4374..fc7b7a84a66c9 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/RussianAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/RussianAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class RussianAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class RussianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final RussianAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/SimpleAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/SimpleAnalyzerProvider.java index faa23cfcd3583..759cfd00a5569 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/SimpleAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/SimpleAnalyzerProvider.java @@ -27,7 +27,7 @@ /** * */ -public class SimpleAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class SimpleAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final SimpleAnalyzer simpleAnalyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzerProvider.java index b8e092b53da53..b93ebc7076250 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzerProvider.java @@ -45,7 +45,7 @@ * * */ -public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider { private static final Map DEFAULT_LANGUAGE_STOPWORDS; static { diff --git a/core/src/main/java/org/elasticsearch/index/analysis/SoraniAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/SoraniAnalyzerProvider.java index 388f5bcdbff4a..d51693b897344 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/SoraniAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/SoraniAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * Provider for {@link SoraniAnalyzer} */ -public class SoraniAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class SoraniAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final SoraniAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java index e8afd7b47956f..6c4ca61141de0 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class SpanishAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class SpanishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final SpanishAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/StandardAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/StandardAnalyzerProvider.java index 00300be48967a..1cc04d145073e 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/StandardAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/StandardAnalyzerProvider.java @@ -19,10 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.util.CharArraySet; -import org.elasticsearch.Version; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; @@ -30,7 +28,7 @@ /** * */ -public class StandardAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class StandardAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final StandardAnalyzer standardAnalyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzerProvider.java index a3c65b0a17bdf..51912fb1177cc 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzerProvider.java @@ -19,9 +19,7 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.util.CharArraySet; -import org.elasticsearch.Version; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; @@ -29,7 +27,7 @@ /** * */ -public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final StandardHtmlStripAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/StopAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/StopAnalyzerProvider.java index cb1c4b8f5c52e..52fd2fdaaf51b 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/StopAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/StopAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class StopAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class StopAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final StopAnalyzer stopAnalyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java index a0e81f29cb988..4a281678ea242 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class SwedishAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class SwedishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final SwedishAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/ThaiAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/ThaiAnalyzerProvider.java index cf4b9dbdb1e27..394a385b704b8 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/ThaiAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/ThaiAnalyzerProvider.java @@ -27,7 +27,7 @@ /** * */ -public class ThaiAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class ThaiAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final ThaiAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/TurkishAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/TurkishAnalyzerProvider.java index 0866bea14f233..885a1ffabf520 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/TurkishAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/TurkishAnalyzerProvider.java @@ -28,7 +28,7 @@ /** * */ -public class TurkishAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class TurkishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final TurkishAnalyzer analyzer; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/WhitespaceAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/WhitespaceAnalyzerProvider.java index c74c7a88dd617..9ab611a8a1f2c 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/WhitespaceAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/WhitespaceAnalyzerProvider.java @@ -19,6 +19,8 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; @@ -27,18 +29,26 @@ /** * */ -public class WhitespaceAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class WhitespaceAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final WhitespaceAnalyzer analyzer; + private final KeywordAnalyzer multiTermAnalyzer; public WhitespaceAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, name, settings); this.analyzer = new WhitespaceAnalyzer(); this.analyzer.setVersion(version); + this.multiTermAnalyzer = new KeywordAnalyzer(); + this.multiTermAnalyzer.setVersion(version); } @Override public WhitespaceAnalyzer get() { return this.analyzer; } + + @Override + public synchronized Analyzer getMultiTerm() { + return multiTermAnalyzer; + } } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index c9f5e416f8af6..ae0f80dd751dd 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -173,8 +173,8 @@ public T indexAnalyzer(NamedAnalyzer indexAnalyzer) { return builder; } - public T searchAnalyzer(NamedAnalyzer searchAnalyzer) { - this.fieldType.setSearchAnalyzer(searchAnalyzer); + public T searchAnalyzer(NamedAnalyzer searchAnalyzer, NamedAnalyzer searchMultiTermAnalyzer) { + this.fieldType.setSearchAnalyzer(searchAnalyzer, searchMultiTermAnalyzer); return builder; } @@ -228,7 +228,7 @@ protected void setupFieldType(BuilderContext context) { } if (fieldType.indexAnalyzer() == null && fieldType.tokenized() == false && fieldType.indexOptions() != IndexOptions.NONE) { fieldType.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - fieldType.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + fieldType.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); } boolean defaultDocValues = defaultDocValues(context.indexCreatedVersion()); defaultFieldType.setHasDocValues(defaultDocValues); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/core/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 3b4dd3bb5c624..fb5ffec11b909 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -59,6 +59,7 @@ public abstract class MappedFieldType extends FieldType { private boolean docValues; private NamedAnalyzer indexAnalyzer; private NamedAnalyzer searchAnalyzer; + private NamedAnalyzer searchMultiTermAnalyzer; private NamedAnalyzer searchQuoteAnalyzer; private SimilarityProvider similarity; private Object nullValue; @@ -72,6 +73,7 @@ protected MappedFieldType(MappedFieldType ref) { this.docValues = ref.hasDocValues(); this.indexAnalyzer = ref.indexAnalyzer(); this.searchAnalyzer = ref.searchAnalyzer(); + this.searchMultiTermAnalyzer = ref.searchMultiTermAnalyzer; this.searchQuoteAnalyzer = ref.searchQuoteAnalyzer(); this.similarity = ref.similarity(); this.nullValue = ref.nullValue(); @@ -121,6 +123,7 @@ public boolean equals(Object o) { Objects.equals(name, fieldType.name) && Objects.equals(indexAnalyzer, fieldType.indexAnalyzer) && Objects.equals(searchAnalyzer, fieldType.searchAnalyzer) && + Objects.equals(searchMultiTermAnalyzer, fieldType.searchMultiTermAnalyzer) && Objects.equals(searchQuoteAnalyzer(), fieldType.searchQuoteAnalyzer()) && Objects.equals(eagerGlobalOrdinals, fieldType.eagerGlobalOrdinals) && Objects.equals(nullValue, fieldType.nullValue) && @@ -130,7 +133,7 @@ public boolean equals(Object o) { @Override public int hashCode() { return Objects.hash(super.hashCode(), name, boost, docValues, indexAnalyzer, searchAnalyzer, searchQuoteAnalyzer, - eagerGlobalOrdinals, similarity == null ? null : similarity.name(), nullValue, nullValueAsString); + searchMultiTermAnalyzer, eagerGlobalOrdinals, similarity == null ? null : similarity.name(), nullValue, nullValueAsString); } // norelease: we need to override freeze() and add safety checks that all settings are actually set @@ -260,9 +263,14 @@ public NamedAnalyzer searchAnalyzer() { return searchAnalyzer; } - public void setSearchAnalyzer(NamedAnalyzer analyzer) { + public void setSearchAnalyzer(NamedAnalyzer analyzer, NamedAnalyzer multiTermAnalyzer) { checkIfFrozen(); this.searchAnalyzer = analyzer; + this.searchMultiTermAnalyzer = multiTermAnalyzer; + } + + public NamedAnalyzer searchMultiTermAnalyzer() { + return searchMultiTermAnalyzer; } public NamedAnalyzer searchQuoteAnalyzer() { diff --git a/core/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/core/src/main/java/org/elasticsearch/index/mapper/MapperService.java index 34a0ead059694..b7a8feae2c7e6 100755 --- a/core/src/main/java/org/elasticsearch/index/mapper/MapperService.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/MapperService.java @@ -35,7 +35,6 @@ import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.AbstractIndexComponent; -import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.AnalysisService; import org.elasticsearch.index.mapper.Mapper.BuilderContext; @@ -121,6 +120,7 @@ public enum MergeReason { private final MapperAnalyzerWrapper indexAnalyzer; private final MapperAnalyzerWrapper searchAnalyzer; + private final MapperAnalyzerWrapper searchMultiTermAnalyzer; private final MapperAnalyzerWrapper searchQuoteAnalyzer; private volatile Map unmappedFieldTypes = emptyMap(); @@ -138,6 +138,7 @@ public MapperService(IndexSettings indexSettings, AnalysisService analysisServic this.documentParser = new DocumentMapperParser(indexSettings, this, analysisService, similarityService, mapperRegistry, queryShardContextSupplier); this.indexAnalyzer = new MapperAnalyzerWrapper(analysisService.defaultIndexAnalyzer(), p -> p.indexAnalyzer()); this.searchAnalyzer = new MapperAnalyzerWrapper(analysisService.defaultSearchAnalyzer(), p -> p.searchAnalyzer()); + this.searchMultiTermAnalyzer = new MapperAnalyzerWrapper(analysisService.defaultSearchMultiTermAnalyzer(), p -> p.searchMultiTermAnalyzer()); this.searchQuoteAnalyzer = new MapperAnalyzerWrapper(analysisService.defaultSearchQuoteAnalyzer(), p -> p.searchQuoteAnalyzer()); this.mapperRegistry = mapperRegistry; @@ -608,6 +609,10 @@ public Analyzer searchAnalyzer() { return this.searchAnalyzer; } + public Analyzer searchMultiTermAnalyzer() { + return searchMultiTermAnalyzer; + } + public Analyzer searchQuoteAnalyzer() { return this.searchQuoteAnalyzer; } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java index 7237381159b74..ecc0f24eb60e8 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java @@ -65,8 +65,6 @@ public static class Defaults { FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); FIELD_TYPE.setTokenized(false); - FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); FIELD_TYPE.freeze(); } } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java index 81f890d3e0e33..819383e556f11 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java @@ -126,6 +126,8 @@ public static class TypeParser implements Mapper.TypeParser { CompletionFieldMapper.Builder builder = new CompletionFieldMapper.Builder(name); NamedAnalyzer indexAnalyzer = null; NamedAnalyzer searchAnalyzer = null; + NamedAnalyzer searchMultiTermAnalyzer = null; + for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { Map.Entry entry = iterator.next(); String fieldName = entry.getKey(); @@ -138,6 +140,7 @@ public static class TypeParser implements Mapper.TypeParser { iterator.remove(); } else if (parserContext.parseFieldMatcher().match(fieldName, Fields.SEARCH_ANALYZER)) { searchAnalyzer = getNamedAnalyzer(parserContext, fieldNode.toString()); + searchMultiTermAnalyzer = parserContext.analysisService().multiTermAnalyzer(name); iterator.remove(); } else if (parserContext.parseFieldMatcher().match(fieldName, Fields.PRESERVE_SEPARATORS)) { builder.preserveSeparators(Boolean.parseBoolean(fieldNode.toString())); @@ -166,7 +169,7 @@ public static class TypeParser implements Mapper.TypeParser { } builder.indexAnalyzer(indexAnalyzer); - builder.searchAnalyzer(searchAnalyzer); + builder.searchAnalyzer(searchAnalyzer, searchMultiTermAnalyzer); return builder; } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper2x.java b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper2x.java index 6fffb0e6e2370..64ccbf4dae9a8 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper2x.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper2x.java @@ -173,6 +173,7 @@ public static class TypeParser implements Mapper.TypeParser { CompletionFieldMapper2x.Builder builder = new Builder(name); NamedAnalyzer indexAnalyzer = null; NamedAnalyzer searchAnalyzer = null; + NamedAnalyzer searchMultiTermAnalyzer = null; for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext(); ) { Map.Entry entry = iterator.next(); String fieldName = entry.getKey(); @@ -187,6 +188,7 @@ public static class TypeParser implements Mapper.TypeParser { iterator.remove(); } else if (parserContext.parseFieldMatcher().match(fieldName, Fields.SEARCH_ANALYZER)) { searchAnalyzer = getNamedAnalyzer(parserContext, fieldNode.toString()); + searchMultiTermAnalyzer = parserContext.analysisService().multiTermAnalyzer(name); iterator.remove(); } else if (fieldName.equals(Fields.PAYLOADS)) { builder.payloads(Boolean.parseBoolean(fieldNode.toString())); @@ -218,7 +220,7 @@ public static class TypeParser implements Mapper.TypeParser { searchAnalyzer = indexAnalyzer; } builder.indexAnalyzer(indexAnalyzer); - builder.searchAnalyzer(searchAnalyzer); + builder.searchAnalyzer(searchAnalyzer, searchMultiTermAnalyzer); return builder; } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java index e8b91f9fbd6e6..33cff22be518e 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java @@ -22,13 +22,9 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; -import org.elasticsearch.common.Nullable; import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.Loggers; @@ -47,7 +43,6 @@ import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.ParseContext; import org.elasticsearch.index.mapper.internal.AllFieldMapper; -import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; import java.util.Arrays; @@ -120,12 +115,6 @@ public StringFieldType fieldType() { return (StringFieldType) super.fieldType(); } - @Override - public Builder searchAnalyzer(NamedAnalyzer searchAnalyzer) { - super.searchAnalyzer(searchAnalyzer); - return this; - } - public Builder positionIncrementGap(int positionIncrementGap) { this.positionIncrementGap = positionIncrementGap; return this; @@ -165,8 +154,6 @@ protected void setupFieldType(BuilderContext context) { public StringFieldMapper build(BuilderContext context) { if (positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) { fieldType.setIndexAnalyzer(new NamedAnalyzer(fieldType.indexAnalyzer(), positionIncrementGap)); - fieldType.setSearchAnalyzer(new NamedAnalyzer(fieldType.searchAnalyzer(), positionIncrementGap)); - fieldType.setSearchQuoteAnalyzer(new NamedAnalyzer(fieldType.searchQuoteAnalyzer(), positionIncrementGap)); } // if the field is not analyzed, then by default, we should omit norms and have docs only // index options, as probably what the user really wants @@ -326,7 +313,9 @@ public Mapper.Builder parse(String fieldName, Map node, ParserCo builder.fieldType().setIndexAnalyzer(parserContext.analysisService().defaultIndexAnalyzer()); } if (builder.fieldType().searchAnalyzer() == null) { - builder.fieldType().setSearchAnalyzer(parserContext.analysisService().defaultSearchAnalyzer()); + builder.fieldType().setSearchAnalyzer( + parserContext.analysisService().defaultSearchAnalyzer(), + parserContext.analysisService().defaultSearchMultiTermAnalyzer()); } if (builder.fieldType().searchQuoteAnalyzer() == null) { builder.fieldType().setSearchQuoteAnalyzer(parserContext.analysisService().defaultSearchQuoteAnalyzer()); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/TextFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/core/TextFieldMapper.java index 61102895f8fb5..1466f4db98008 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/core/TextFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/core/TextFieldMapper.java @@ -120,8 +120,6 @@ public Builder fielddataFrequencyFilter(double minFreq, double maxFreq, int minS public TextFieldMapper build(BuilderContext context) { if (positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) { fieldType.setIndexAnalyzer(new NamedAnalyzer(fieldType.indexAnalyzer(), positionIncrementGap)); - fieldType.setSearchAnalyzer(new NamedAnalyzer(fieldType.searchAnalyzer(), positionIncrementGap)); - fieldType.setSearchQuoteAnalyzer(new NamedAnalyzer(fieldType.searchQuoteAnalyzer(), positionIncrementGap)); } setupFieldType(context); TextFieldMapper fieldMapper = new TextFieldMapper( @@ -136,7 +134,9 @@ public static class TypeParser implements Mapper.TypeParser { public Mapper.Builder parse(String fieldName, Map node, ParserContext parserContext) throws MapperParsingException { TextFieldMapper.Builder builder = new TextFieldMapper.Builder(fieldName); builder.fieldType().setIndexAnalyzer(parserContext.analysisService().defaultIndexAnalyzer()); - builder.fieldType().setSearchAnalyzer(parserContext.analysisService().defaultSearchAnalyzer()); + builder.fieldType().setSearchAnalyzer( + parserContext.analysisService().defaultSearchAnalyzer(), + parserContext.analysisService().defaultSearchMultiTermAnalyzer()); builder.fieldType().setSearchQuoteAnalyzer(parserContext.analysisService().defaultSearchQuoteAnalyzer()); parseTextField(builder, fieldName, node, parserContext); for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java b/core/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java index 3ada93acccb12..a277b7d7bfeed 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java @@ -22,7 +22,6 @@ import org.apache.lucene.index.IndexOptions; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.Version; -import org.elasticsearch.common.Strings; import org.elasticsearch.common.joda.FormatDateTimeFormatter; import org.elasticsearch.common.joda.Joda; import org.elasticsearch.common.logging.DeprecationLogger; @@ -36,7 +35,6 @@ import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.object.ObjectMapper; import org.elasticsearch.index.similarity.SimilarityProvider; -import org.elasticsearch.index.similarity.SimilarityService; import java.util.Arrays; import java.util.Collections; @@ -110,6 +108,7 @@ public static void parseNumberField(LegacyNumberFieldMapper.Builder builder, Str private static void parseAnalyzersAndTermVectors(FieldMapper.Builder builder, String name, Map fieldNode, Mapper.TypeParser.ParserContext parserContext) { NamedAnalyzer indexAnalyzer = null; NamedAnalyzer searchAnalyzer = null; + NamedAnalyzer searchMultiTermAnalyzer = null; NamedAnalyzer searchQuoteAnalyzer = null; for (Iterator> iterator = fieldNode.entrySet().iterator(); iterator.hasNext();) { @@ -137,6 +136,9 @@ private static void parseAnalyzersAndTermVectors(FieldMapper.Builder builder, St throw new MapperParsingException("analyzer [" + propNode.toString() + "] not found for field [" + name + "]"); } indexAnalyzer = analyzer; + if (searchMultiTermAnalyzer == null) { // give precedence to search_analyzer + searchMultiTermAnalyzer = parserContext.analysisService().multiTermAnalyzer(propNode.toString()); + } iterator.remove(); } else if (propName.equals("search_analyzer")) { NamedAnalyzer analyzer = parserContext.analysisService().analyzer(propNode.toString()); @@ -144,6 +146,7 @@ private static void parseAnalyzersAndTermVectors(FieldMapper.Builder builder, St throw new MapperParsingException("analyzer [" + propNode.toString() + "] not found for field [" + name + "]"); } searchAnalyzer = analyzer; + searchMultiTermAnalyzer = parserContext.analysisService().multiTermAnalyzer(propNode.toString()); iterator.remove(); } else if (propName.equals("search_quote_analyzer")) { NamedAnalyzer analyzer = parserContext.analysisService().analyzer(propNode.toString()); @@ -175,7 +178,7 @@ private static void parseAnalyzersAndTermVectors(FieldMapper.Builder builder, St builder.indexAnalyzer(indexAnalyzer); } if (searchAnalyzer != null) { - builder.searchAnalyzer(searchAnalyzer); + builder.searchAnalyzer(searchAnalyzer, searchMultiTermAnalyzer); } if (searchQuoteAnalyzer != null) { builder.searchQuoteAnalyzer(searchQuoteAnalyzer); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java index 02a2cb0c78031..b2f3e54e13d54 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java @@ -118,10 +118,6 @@ public AllFieldMapper build(BuilderContext context) { } else { fieldType.setIndexAnalyzer(new NamedAnalyzer(fieldType.indexAnalyzer(), Defaults.POSITION_INCREMENT_GAP)); - fieldType.setSearchAnalyzer(new NamedAnalyzer(fieldType.searchAnalyzer(), - Defaults.POSITION_INCREMENT_GAP)); - fieldType.setSearchQuoteAnalyzer(new NamedAnalyzer(fieldType.searchQuoteAnalyzer(), - Defaults.POSITION_INCREMENT_GAP)); } fieldType.setTokenized(true); @@ -135,7 +131,9 @@ public MetadataFieldMapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { Builder builder = new Builder(parserContext.mapperService().fullName(NAME)); builder.fieldType().setIndexAnalyzer(parserContext.analysisService().defaultIndexAnalyzer()); - builder.fieldType().setSearchAnalyzer(parserContext.analysisService().defaultSearchAnalyzer()); + builder.fieldType().setSearchAnalyzer( + parserContext.analysisService().defaultSearchAnalyzer(), + parserContext.analysisService().defaultSearchMultiTermAnalyzer()); builder.fieldType().setSearchQuoteAnalyzer(parserContext.analysisService().defaultSearchQuoteAnalyzer()); // parseField below will happily parse the doc_values setting, but it is then never passed to diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java index 06dadf7c4e6e1..4e64ed443cc79 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java @@ -67,7 +67,7 @@ public static class Defaults { FIELD_TYPE.setStored(false); FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); FIELD_TYPE.setName(NAME); FIELD_TYPE.freeze(); } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java index 67bacf93e86c0..b2f8dd5613ec2 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java @@ -72,7 +72,7 @@ public static class Defaults { FIELD_TYPE.setStored(false); FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); FIELD_TYPE.setName(NAME); FIELD_TYPE.freeze(); } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/IndexFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/IndexFieldMapper.java index f56b04c92aa01..d0e4f472700c3 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/IndexFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/IndexFieldMapper.java @@ -63,7 +63,7 @@ public static class Defaults { FIELD_TYPE.setStored(false); FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); FIELD_TYPE.setName(NAME); FIELD_TYPE.freeze(); } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/RoutingFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/RoutingFieldMapper.java index 031732f4d11c1..14cc0cdbb9115 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/RoutingFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/RoutingFieldMapper.java @@ -57,7 +57,7 @@ public static class Defaults { FIELD_TYPE.setStored(true); FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); FIELD_TYPE.setName(NAME); FIELD_TYPE.freeze(); } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java index ce30490968c30..3abf0ded5807c 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java @@ -72,7 +72,7 @@ public static class Defaults { FIELD_TYPE.setStored(true); FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); FIELD_TYPE.setName(NAME); FIELD_TYPE.freeze(); } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/TTLFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/TTLFieldMapper.java index ec1024c07f54a..ea30ba0f30965 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/TTLFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/TTLFieldMapper.java @@ -63,7 +63,7 @@ public static class Defaults extends LegacyLongFieldMapper.Defaults { TTL_FIELD_TYPE.setNumericPrecisionStep(Defaults.PRECISION_STEP_64_BIT); TTL_FIELD_TYPE.setName(NAME); TTL_FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - TTL_FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + TTL_FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); TTL_FIELD_TYPE.freeze(); } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/TimestampFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/TimestampFieldMapper.java index 24a86b11392e8..de7245e4a27e0 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/TimestampFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/TimestampFieldMapper.java @@ -67,7 +67,7 @@ public static class Defaults extends LegacyDateFieldMapper.Defaults { FIELD_TYPE.setName(NAME); FIELD_TYPE.setDateTimeFormatter(DATE_TIME_FORMATTER); FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); FIELD_TYPE.setHasDocValues(true); FIELD_TYPE.freeze(); } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java index d882be8e9d7cb..d462ea04d62af 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java @@ -72,7 +72,7 @@ public static class Defaults { FIELD_TYPE.setStored(false); FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); FIELD_TYPE.setName(NAME); FIELD_TYPE.freeze(); } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java index e020864c7e85d..034459e299ea0 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java @@ -62,7 +62,7 @@ public static class Defaults { FIELD_TYPE.setStored(true); FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); FIELD_TYPE.setName(NAME); FIELD_TYPE.freeze(); diff --git a/core/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java index 692c157c1b188..23a1735ae92f5 100644 --- a/core/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java @@ -382,11 +382,7 @@ protected Query doToQuery(QueryShardContext context) throws IOException { Analyzer analyzerObj; if (analyzer == null) { - if (fieldType != null) { - analyzerObj = context.getSearchAnalyzer(fieldType); - } else { - analyzerObj = context.getMapperService().searchAnalyzer(); - } + analyzerObj = context.getMapperService().searchAnalyzer(); } else { analyzerObj = context.getMapperService().analysisService().analyzer(analyzer); if (analyzerObj == null) { diff --git a/core/src/main/java/org/elasticsearch/index/query/QueryShardContext.java b/core/src/main/java/org/elasticsearch/index/query/QueryShardContext.java index 56a75a691387d..f30344d97a03a 100644 --- a/core/src/main/java/org/elasticsearch/index/query/QueryShardContext.java +++ b/core/src/main/java/org/elasticsearch/index/query/QueryShardContext.java @@ -26,7 +26,6 @@ import java.util.Collection; import java.util.HashMap; import java.util.Map; -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.MapperQueryParser; import org.apache.lucene.queryparser.classic.QueryParserSettings; @@ -192,28 +191,6 @@ public ObjectMapper getObjectMapper(String name) { return mapperService.getObjectMapper(name); } - /** - * Gets the search analyzer for the given field, or the default if there is none present for the field - * TODO: remove this by moving defaults into mappers themselves - */ - public Analyzer getSearchAnalyzer(MappedFieldType fieldType) { - if (fieldType.searchAnalyzer() != null) { - return fieldType.searchAnalyzer(); - } - return getMapperService().searchAnalyzer(); - } - - /** - * Gets the search quote analyzer for the given field, or the default if there is none present for the field - * TODO: remove this by moving defaults into mappers themselves - */ - public Analyzer getSearchQuoteAnalyzer(MappedFieldType fieldType) { - if (fieldType.searchQuoteAnalyzer() != null) { - return fieldType.searchQuoteAnalyzer(); - } - return getMapperService().searchQuoteAnalyzer(); - } - public void setAllowUnmappedFields(boolean allowUnmappedFields) { this.allowUnmappedFields = allowUnmappedFields; } diff --git a/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java index c390507a78573..3c61a688a4710 100644 --- a/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java @@ -63,7 +63,6 @@ public class QueryStringQueryBuilder extends AbstractQueryBuildertrue. + * @deprecated this is now computed utematically based on the analysis chain */ + @Deprecated public QueryStringQueryBuilder lowercaseExpandedTerms(boolean lowercaseExpandedTerms) { - this.lowercaseExpandedTerms = lowercaseExpandedTerms; return this; } + /** + * @deprecated this is now computed utematically based on the analysis chain + */ + @Deprecated public boolean lowercaseExpandedTerms() { - return this.lowercaseExpandedTerms; + return false; } /** @@ -484,7 +481,7 @@ public QueryStringQueryBuilder analyzeWildcard(Boolean analyzeWildcard) { } public Boolean analyzeWildcard() { - return this.analyzeWildcard; + return analyzeWildcard; } public QueryStringQueryBuilder rewrite(String rewrite) { @@ -530,13 +527,22 @@ public Boolean lenient() { return this.lenient; } + /** + * @deprecated This is deprecated in favour of setting the language on the `lowercase` filter + * in the analysis chain + */ + @Deprecated public QueryStringQueryBuilder locale(Locale locale) { - this.locale = locale == null ? DEFAULT_LOCALE : locale; return this; } + /** + * @deprecated This is deprecated in favour of setting the language on the `lowercase` filter + * in the analysis chain. + */ + @Deprecated public Locale locale() { - return this.locale; + return Locale.ROOT; } /** @@ -599,7 +605,6 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep if (this.allowLeadingWildcard != null) { builder.field(ALLOW_LEADING_WILDCARD_FIELD.getPreferredName(), this.allowLeadingWildcard); } - builder.field(LOWERCASE_EXPANDED_TERMS_FIELD.getPreferredName(), this.lowercaseExpandedTerms); builder.field(ENABLE_POSITION_INCREMENTS_FIELD.getPreferredName(), this.enablePositionIncrements); this.fuzziness.toXContent(builder, params); builder.field(FUZZY_PREFIX_LENGTH_FIELD.getPreferredName(), this.fuzzyPrefixLength); @@ -623,7 +628,6 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep if (this.lenient != null) { builder.field(LENIENT_FIELD.getPreferredName(), this.lenient); } - builder.field(LOCALE_FIELD.getPreferredName(), this.locale.toLanguageTag()); if (this.timeZone != null) { builder.field(TIME_ZONE_FIELD.getPreferredName(), this.timeZone.getID()); } @@ -644,7 +648,6 @@ public static Optional fromXContent(QueryParseContext p float boost = AbstractQueryBuilder.DEFAULT_BOOST; boolean autoGeneratePhraseQueries = QueryStringQueryBuilder.DEFAULT_AUTO_GENERATE_PHRASE_QUERIES; int maxDeterminizedStates = QueryStringQueryBuilder.DEFAULT_MAX_DETERMINED_STATES; - boolean lowercaseExpandedTerms = QueryStringQueryBuilder.DEFAULT_LOWERCASE_EXPANDED_TERMS; boolean enablePositionIncrements = QueryStringQueryBuilder.DEFAULT_ENABLE_POSITION_INCREMENTS; boolean escape = QueryStringQueryBuilder.DEFAULT_ESCAPE; boolean useDisMax = QueryStringQueryBuilder.DEFAULT_USE_DIS_MAX; @@ -659,7 +662,6 @@ public static Optional fromXContent(QueryParseContext p Boolean lenient = null; Operator defaultOperator = QueryStringQueryBuilder.DEFAULT_OPERATOR; String timeZone = null; - Locale locale = QueryStringQueryBuilder.DEFAULT_LOCALE; Fuzziness fuzziness = QueryStringQueryBuilder.DEFAULT_FUZZINESS; String fuzzyRewrite = null; String rewrite = null; @@ -709,7 +711,7 @@ public static Optional fromXContent(QueryParseContext p } else if (parseContext.getParseFieldMatcher().match(currentFieldName, MAX_DETERMINED_STATES_FIELD)) { maxDeterminizedStates = parser.intValue(); } else if (parseContext.getParseFieldMatcher().match(currentFieldName, LOWERCASE_EXPANDED_TERMS_FIELD)) { - lowercaseExpandedTerms = parser.booleanValue(); + // ignored } else if (parseContext.getParseFieldMatcher().match(currentFieldName, ENABLE_POSITION_INCREMENTS_FIELD)) { enablePositionIncrements = parser.booleanValue(); } else if (parseContext.getParseFieldMatcher().match(currentFieldName, ESCAPE_FIELD)) { @@ -741,8 +743,7 @@ public static Optional fromXContent(QueryParseContext p } else if (parseContext.getParseFieldMatcher().match(currentFieldName, LENIENT_FIELD)) { lenient = parser.booleanValue(); } else if (parseContext.getParseFieldMatcher().match(currentFieldName, LOCALE_FIELD)) { - String localeStr = parser.text(); - locale = Locale.forLanguageTag(localeStr); + // ignore } else if (parseContext.getParseFieldMatcher().match(currentFieldName, TIME_ZONE_FIELD)) { try { timeZone = parser.text(); @@ -774,7 +775,6 @@ public static Optional fromXContent(QueryParseContext p queryStringQuery.allowLeadingWildcard(allowLeadingWildcard); queryStringQuery.autoGeneratePhraseQueries(autoGeneratePhraseQueries); queryStringQuery.maxDeterminizedStates(maxDeterminizedStates); - queryStringQuery.lowercaseExpandedTerms(lowercaseExpandedTerms); queryStringQuery.enablePositionIncrements(enablePositionIncrements); queryStringQuery.escape(escape); queryStringQuery.useDisMax(useDisMax); @@ -790,7 +790,6 @@ public static Optional fromXContent(QueryParseContext p queryStringQuery.quoteFieldSuffix(quoteFieldSuffix); queryStringQuery.lenient(lenient); queryStringQuery.timeZone(timeZone); - queryStringQuery.locale(locale); queryStringQuery.boost(boost); queryStringQuery.queryName(queryName); return Optional.of(queryStringQuery); @@ -812,10 +811,8 @@ protected boolean doEquals(QueryStringQueryBuilder other) { Objects.equals(quoteFieldSuffix, other.quoteFieldSuffix) && Objects.equals(autoGeneratePhraseQueries, other.autoGeneratePhraseQueries) && Objects.equals(allowLeadingWildcard, other.allowLeadingWildcard) && - Objects.equals(lowercaseExpandedTerms, other.lowercaseExpandedTerms) && Objects.equals(enablePositionIncrements, other.enablePositionIncrements) && Objects.equals(analyzeWildcard, other.analyzeWildcard) && - Objects.equals(locale.toLanguageTag(), other.locale.toLanguageTag()) && Objects.equals(fuzziness, other.fuzziness) && Objects.equals(fuzzyPrefixLength, other.fuzzyPrefixLength) && Objects.equals(fuzzyMaxExpansions, other.fuzzyMaxExpansions) && @@ -835,8 +832,8 @@ protected boolean doEquals(QueryStringQueryBuilder other) { @Override protected int doHashCode() { return Objects.hash(queryString, defaultField, fieldsAndWeights, defaultOperator, analyzer, quoteAnalyzer, - quoteFieldSuffix, autoGeneratePhraseQueries, allowLeadingWildcard, lowercaseExpandedTerms, - enablePositionIncrements, analyzeWildcard, locale.toLanguageTag(), fuzziness, fuzzyPrefixLength, + quoteFieldSuffix, autoGeneratePhraseQueries, allowLeadingWildcard, + enablePositionIncrements, analyzeWildcard, fuzziness, fuzzyPrefixLength, fuzzyMaxExpansions, fuzzyRewrite, phraseSlop, useDisMax, tieBreaker, rewrite, minimumShouldMatch, lenient, timeZone == null ? 0 : timeZone.getID(), escape, maxDeterminizedStates); } @@ -845,12 +842,7 @@ protected int doHashCode() { protected Query doToQuery(QueryShardContext context) throws IOException { //TODO would be nice to have all the settings in one place: some change though at query execution time //e.g. field names get expanded to concrete names, defaults get resolved sometimes to settings values etc. - QueryParserSettings qpSettings; - if (this.escape) { - qpSettings = new QueryParserSettings(org.apache.lucene.queryparser.classic.QueryParser.escape(this.queryString)); - } else { - qpSettings = new QueryParserSettings(this.queryString); - } + QueryParserSettings qpSettings = new QueryParserSettings(); qpSettings.defaultField(this.defaultField == null ? context.defaultField() : this.defaultField); Map resolvedFields = new TreeMap<>(); for (Map.Entry fieldsEntry : fieldsAndWeights.entrySet()) { @@ -868,33 +860,34 @@ protected Query doToQuery(QueryShardContext context) throws IOException { qpSettings.defaultOperator(defaultOperator.toQueryParserOperator()); if (analyzer == null) { - qpSettings.defaultAnalyzer(context.getMapperService().searchAnalyzer()); + qpSettings.analyzer( + context.getMapperService().searchAnalyzer(), + context.getMapperService().searchMultiTermAnalyzer()); } else { NamedAnalyzer namedAnalyzer = context.getAnalysisService().analyzer(analyzer); if (namedAnalyzer == null) { throw new QueryShardException(context, "[query_string] analyzer [" + analyzer + "] not found"); } - qpSettings.forceAnalyzer(namedAnalyzer); + NamedAnalyzer multiTermAnalyzer = context.getAnalysisService().multiTermAnalyzer(analyzer); + qpSettings.analyzer(namedAnalyzer, multiTermAnalyzer); } if (quoteAnalyzer != null) { NamedAnalyzer namedAnalyzer = context.getAnalysisService().analyzer(quoteAnalyzer); if (namedAnalyzer == null) { throw new QueryShardException(context, "[query_string] quote_analyzer [" + quoteAnalyzer + "] not found"); } - qpSettings.forceQuoteAnalyzer(namedAnalyzer); + qpSettings.quoteAnalyzer(namedAnalyzer); } else if (analyzer != null) { - qpSettings.forceQuoteAnalyzer(qpSettings.analyzer()); + qpSettings.quoteAnalyzer(qpSettings.analyzer()); } else { - qpSettings.defaultQuoteAnalyzer(context.getMapperService().searchQuoteAnalyzer()); + qpSettings.quoteAnalyzer(context.getMapperService().searchQuoteAnalyzer()); } qpSettings.quoteFieldSuffix(quoteFieldSuffix); qpSettings.autoGeneratePhraseQueries(autoGeneratePhraseQueries); qpSettings.allowLeadingWildcard(allowLeadingWildcard == null ? context.queryStringAllowLeadingWildcard() : allowLeadingWildcard); qpSettings.analyzeWildcard(analyzeWildcard == null ? context.queryStringAnalyzeWildcard() : analyzeWildcard); - qpSettings.lowercaseExpandedTerms(lowercaseExpandedTerms); qpSettings.enablePositionIncrements(enablePositionIncrements); - qpSettings.locale(locale); qpSettings.fuzziness(fuzziness); qpSettings.fuzzyPrefixLength(fuzzyPrefixLength); qpSettings.fuzzyMaxExpansions(fuzzyMaxExpansions); diff --git a/core/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java b/core/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java index 151e924ad163a..d6dfc2bed12d2 100644 --- a/core/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java +++ b/core/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java @@ -33,7 +33,6 @@ import org.elasticsearch.index.mapper.MappedFieldType; import java.io.IOException; -import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.List; @@ -46,12 +45,14 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.SimpleQueryParser { private final Settings settings; + private final Analyzer multiTermAnalyzer; private QueryShardContext context; /** Creates a new parser with custom flags used to enable/disable certain features. */ - public SimpleQueryParser(Analyzer analyzer, Map weights, int flags, + public SimpleQueryParser(Analyzer analyzer, Analyzer multiTermAnalyzer, Map weights, int flags, Settings settings, QueryShardContext context) { super(analyzer, weights, flags); + this.multiTermAnalyzer = multiTermAnalyzer; this.settings = settings; this.context = context; } @@ -93,25 +94,48 @@ public Query newDefaultQuery(String text) { } /** - * Dispatches to Lucene's SimpleQueryParser's newFuzzyQuery, optionally - * lowercasing the term first + * Dispatches to Lucene's SimpleQueryParser's newFuzzyQuery. */ @Override public Query newFuzzyQuery(String text, int fuzziness) { - if (settings.lowercaseExpandedTerms()) { - text = text.toLowerCase(settings.locale()); + final Analyzer oldAnalyzer = getAnalyzer(); + try { + setAnalyzer(multiTermAnalyzer); + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + bq.setDisableCoord(true); + for (Map.Entry entry : weights.entrySet()) { + final String field = entry.getKey(); + try { + String analyzedText = analyzeSingleToken(field, text); + Query query = new FuzzyQuery(new Term(field, analyzedText), fuzziness); + bq.add(wrapWithBoost(query, entry.getValue()), BooleanClause.Occur.SHOULD); + } catch (RuntimeException e) { + rethrowUnlessLenient(e); + } + } + return super.simplify(bq.build()); + } finally { + setAnalyzer(oldAnalyzer); } - BooleanQuery.Builder bq = new BooleanQuery.Builder(); - bq.setDisableCoord(true); - for (Map.Entry entry : weights.entrySet()) { - try { - Query query = new FuzzyQuery(new Term(entry.getKey(), text), fuzziness); - bq.add(wrapWithBoost(query, entry.getValue()), BooleanClause.Occur.SHOULD); - } catch (RuntimeException e) { - rethrowUnlessLenient(e); + } + + private String analyzeSingleToken(String field, String value) { + try (TokenStream tk = getAnalyzer().tokenStream(field, value)) { + CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class); + tk.reset(); + final String term; + if (tk.incrementToken()) { + term = termAtt.toString(); + } else { + throw new IllegalStateException("Expected 1 token but got 0"); + } + if (tk.incrementToken()) { + throw new IllegalStateException("Expected 1 token but got 2 or more"); } + return term; + } catch (IOException e) { + throw new IllegalStateException("Cannot happen", e); } - return super.simplify(bq.build()); } @Override @@ -133,31 +157,28 @@ public Query newPhraseQuery(String text, int slop) { /** * Dispatches to Lucene's SimpleQueryParser's newPrefixQuery, optionally - * lowercasing the term first or trying to analyze terms + * trying to analyze terms. */ @Override public Query newPrefixQuery(String text) { - if (settings.lowercaseExpandedTerms()) { - text = text.toLowerCase(settings.locale()); - } - BooleanQuery.Builder bq = new BooleanQuery.Builder(); - bq.setDisableCoord(true); - for (Map.Entry entry : weights.entrySet()) { - try { - if (settings.analyzeWildcard()) { + final Analyzer oldAnalyzer = getAnalyzer(); + try { + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + bq.setDisableCoord(true); + for (Map.Entry entry : weights.entrySet()) { + try { Query analyzedQuery = newPossiblyAnalyzedQuery(entry.getKey(), text); if (analyzedQuery != null) { bq.add(wrapWithBoost(analyzedQuery, entry.getValue()), BooleanClause.Occur.SHOULD); } - } else { - Query query = new PrefixQuery(new Term(entry.getKey(), text)); - bq.add(wrapWithBoost(query, entry.getValue()), BooleanClause.Occur.SHOULD); + } catch (RuntimeException e) { + return rethrowUnlessLenient(e); } - } catch (RuntimeException e) { - return rethrowUnlessLenient(e); } + return super.simplify(bq.build()); + } finally { + setAnalyzer(oldAnalyzer); } - return super.simplify(bq.build()); } private static Query wrapWithBoost(Query query, float boost) { @@ -173,6 +194,9 @@ private static Query wrapWithBoost(Query query, float boost) { * of {@code TermQuery}s and {@code PrefixQuery}s */ private Query newPossiblyAnalyzedQuery(String field, String termStr) { + if (settings.analyzeWildcard() == false) { + setAnalyzer(multiTermAnalyzer); + } List> tlist = new ArrayList<> (); // get Analyzer from superclass and tokenize the term try (TokenStream source = getAnalyzer().tokenStream(field, termStr)) { @@ -248,10 +272,6 @@ private Query newPossiblyAnalyzedQuery(String field, String termStr) { * their default values */ static class Settings { - /** Locale to use for parsing. */ - private Locale locale = SimpleQueryStringBuilder.DEFAULT_LOCALE; - /** Specifies whether parsed terms should be lowercased. */ - private boolean lowercaseExpandedTerms = SimpleQueryStringBuilder.DEFAULT_LOWERCASE_EXPANDED_TERMS; /** Specifies whether lenient query parsing should be used. */ private boolean lenient = SimpleQueryStringBuilder.DEFAULT_LENIENT; /** Specifies whether wildcards should be analyzed. */ @@ -264,36 +284,11 @@ static class Settings { public Settings() { } - public Settings(Locale locale, Boolean lowercaseExpandedTerms, Boolean lenient, Boolean analyzeWildcard) { - this.locale = locale; - this.lowercaseExpandedTerms = lowercaseExpandedTerms; + public Settings(Boolean lenient, Boolean analyzeWildcard) { this.lenient = lenient; this.analyzeWildcard = analyzeWildcard; } - /** Specifies the locale to use for parsing, Locale.ROOT by default. */ - public void locale(Locale locale) { - this.locale = (locale != null) ? locale : SimpleQueryStringBuilder.DEFAULT_LOCALE; - } - - /** Returns the locale to use for parsing. */ - public Locale locale() { - return this.locale; - } - - /** - * Specifies whether to lowercase parse terms, defaults to true if - * unset. - */ - public void lowercaseExpandedTerms(boolean lowercaseExpandedTerms) { - this.lowercaseExpandedTerms = lowercaseExpandedTerms; - } - - /** Returns whether to lowercase parse terms. */ - public boolean lowercaseExpandedTerms() { - return this.lowercaseExpandedTerms; - } - /** Specifies whether to use lenient parsing, defaults to false. */ public void lenient(boolean lenient) { this.lenient = lenient; @@ -316,10 +311,7 @@ public boolean analyzeWildcard() { @Override public int hashCode() { - // checking the return value of toLanguageTag() for locales only. - // For further reasoning see - // https://issues.apache.org/jira/browse/LUCENE-4021 - return Objects.hash(locale.toLanguageTag(), lowercaseExpandedTerms, lenient, analyzeWildcard); + return Objects.hash(lenient, analyzeWildcard); } @Override @@ -332,13 +324,8 @@ public boolean equals(Object obj) { } Settings other = (Settings) obj; - // checking the return value of toLanguageTag() for locales only. - // For further reasoning see - // https://issues.apache.org/jira/browse/LUCENE-4021 - return (Objects.equals(locale.toLanguageTag(), other.locale.toLanguageTag()) - && Objects.equals(lowercaseExpandedTerms, other.lowercaseExpandedTerms) - && Objects.equals(lenient, other.lenient) - && Objects.equals(analyzeWildcard, other.analyzeWildcard)); + return Objects.equals(lenient, other.lenient) + && Objects.equals(analyzeWildcard, other.analyzeWildcard); } } } diff --git a/core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java b/core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java index 0bedf67820bb1..622620fbef81b 100644 --- a/core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java @@ -78,10 +78,6 @@ * > online documentation. */ public class SimpleQueryStringBuilder extends AbstractQueryBuilder { - /** Default locale used for parsing.*/ - public static final Locale DEFAULT_LOCALE = Locale.ROOT; - /** Default for lowercasing parsed terms.*/ - public static final boolean DEFAULT_LOWERCASE_EXPANDED_TERMS = true; /** Default for using lenient query parsing.*/ public static final boolean DEFAULT_LENIENT = false; /** Default for wildcard analysis.*/ @@ -98,8 +94,10 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder fromXContent(QueryParseContext String analyzerName = null; int flags = SimpleQueryStringFlag.ALL.value(); boolean lenient = SimpleQueryStringBuilder.DEFAULT_LENIENT; - boolean lowercaseExpandedTerms = SimpleQueryStringBuilder.DEFAULT_LOWERCASE_EXPANDED_TERMS; boolean analyzeWildcard = SimpleQueryStringBuilder.DEFAULT_ANALYZE_WILDCARD; - Locale locale = null; XContentParser.Token token; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { @@ -484,10 +486,9 @@ public static Optional fromXContent(QueryParseContext } } } else if (parseContext.getParseFieldMatcher().match(currentFieldName, LOCALE_FIELD)) { - String localeStr = parser.text(); - locale = Locale.forLanguageTag(localeStr); + // ignore } else if (parseContext.getParseFieldMatcher().match(currentFieldName, LOWERCASE_EXPANDED_TERMS_FIELD)) { - lowercaseExpandedTerms = parser.booleanValue(); + // ignore } else if (parseContext.getParseFieldMatcher().match(currentFieldName, LENIENT_FIELD)) { lenient = parser.booleanValue(); } else if (parseContext.getParseFieldMatcher().match(currentFieldName, ANALYZE_WILDCARD_FIELD)) { @@ -513,7 +514,7 @@ public static Optional fromXContent(QueryParseContext SimpleQueryStringBuilder qb = new SimpleQueryStringBuilder(queryBody); qb.boost(boost).fields(fieldsAndWeights).analyzer(analyzerName).queryName(queryName).minimumShouldMatch(minimumShouldMatch); - qb.flags(flags).defaultOperator(defaultOperator).locale(locale).lowercaseExpandedTerms(lowercaseExpandedTerms); + qb.flags(flags).defaultOperator(defaultOperator); qb.lenient(lenient).analyzeWildcard(analyzeWildcard).boost(boost); return Optional.of(qb); } diff --git a/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 47585c2210dca..f9d859506e7fa 100644 --- a/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -199,8 +199,8 @@ public void setZeroTermsQuery(ZeroTermsQuery zeroTermsQuery) { protected Analyzer getAnalyzer(MappedFieldType fieldType) { if (this.analyzer == null) { - if (fieldType != null) { - return context.getSearchAnalyzer(fieldType); + if (fieldType != null && fieldType.searchAnalyzer() != null) { + return fieldType.searchAnalyzer(); } return context.getMapperService().searchAnalyzer(); } else { diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltAnalyzers.java b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltAnalyzers.java index 6d5c3a8ed1873..d8e22094dfe4f 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltAnalyzers.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltAnalyzers.java @@ -19,6 +19,8 @@ package org.elasticsearch.indices.analysis; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.bg.BulgarianAnalyzer; import org.apache.lucene.analysis.br.BrazilianAnalyzer; @@ -26,12 +28,15 @@ import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.ckb.SoraniAnalyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.SimpleAnalyzer; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.cz.CzechAnalyzer; import org.apache.lucene.analysis.da.DanishAnalyzer; import org.apache.lucene.analysis.de.GermanAnalyzer; +import org.apache.lucene.analysis.de.GermanNormalizationFilter; import org.apache.lucene.analysis.el.GreekAnalyzer; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.es.SpanishAnalyzer; @@ -55,10 +60,12 @@ import org.apache.lucene.analysis.ru.RussianAnalyzer; import org.apache.lucene.analysis.standard.ClassicAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.sv.SwedishAnalyzer; import org.apache.lucene.analysis.th.ThaiAnalyzer; import org.apache.lucene.analysis.tr.TurkishAnalyzer; import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.analysis.util.ElisionFilter; import org.elasticsearch.Version; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.index.analysis.PatternAnalyzer; @@ -80,13 +87,23 @@ protected Analyzer create(Version version) { a.setVersion(version.luceneVersion); return a; } + @Override + protected Analyzer createMultiTerm(Version version) { + return new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + final Tokenizer source = new KeywordTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + return new TokenStreamComponents(source, result); + } + }; + } }, DEFAULT(CachingStrategy.ELASTICSEARCH){ @Override protected Analyzer create(Version version) { - // by calling get analyzer we are ensuring reuse of the same STANDARD analyzer for DEFAULT! - // this call does not create a new instance return STANDARD.getAnalyzer(version); } }, @@ -96,6 +113,10 @@ protected Analyzer create(Version version) { protected Analyzer create(Version version) { return new KeywordAnalyzer(); } + @Override + protected Analyzer createMultiTerm(Version version) { + return create(version); + } }, STOP { @@ -114,6 +135,10 @@ protected Analyzer create(Version version) { a.setVersion(version.luceneVersion); return a; } + @Override + protected Analyzer createMultiTerm(Version version) { + return KEYWORD.createMultiTerm(version); + } }, SIMPLE { @@ -283,6 +308,19 @@ protected Analyzer create(Version version) { a.setVersion(version.luceneVersion); return a; } + @Override + protected Analyzer createMultiTerm(Version version) { + return new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + final Tokenizer source = new KeywordTokenizer(); + TokenStream result = new StandardFilter(source); + result = new ElisionFilter(result, FrenchAnalyzer.DEFAULT_ARTICLES); + result = new LowerCaseFilter(result); + return new TokenStreamComponents(source, result); + } + }; + } }, GALICIAN { @@ -301,6 +339,19 @@ protected Analyzer create(Version version) { a.setVersion(version.luceneVersion); return a; } + @Override + protected Analyzer createMultiTerm(Version version) { + return new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + final Tokenizer source = new KeywordTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new GermanNormalizationFilter(result); + return new TokenStreamComponents(source, result); + } + }; + } }, GREEK { @@ -467,7 +518,13 @@ protected Analyzer create(Version version) { abstract protected Analyzer create(Version version); + protected Analyzer createMultiTerm(Version version) { + // default impl, should work fine for standard and most european languages + return STANDARD.createMultiTerm(version); + } + protected final PreBuiltCacheFactory.PreBuiltCache cache; + protected final PreBuiltCacheFactory.PreBuiltCache multiTermCache; PreBuiltAnalyzers() { this(PreBuiltCacheFactory.CachingStrategy.LUCENE); @@ -475,6 +532,7 @@ protected Analyzer create(Version version) { PreBuiltAnalyzers(PreBuiltCacheFactory.CachingStrategy cachingStrategy) { cache = PreBuiltCacheFactory.getCache(cachingStrategy); + multiTermCache = PreBuiltCacheFactory.getCache(cachingStrategy); } PreBuiltCacheFactory.PreBuiltCache getCache() { @@ -491,6 +549,16 @@ public synchronized Analyzer getAnalyzer(Version version) { return analyzer; } + public synchronized Analyzer getMultiTermAnalyzer(Version version) { + Analyzer analyzer = multiTermCache.get(version); + if (analyzer == null) { + analyzer = this.createMultiTerm(version); + multiTermCache.put(version, analyzer); + } + + return analyzer; + } + /** * Get a pre built Analyzer by its name or fallback to the default one * @param name Analyzer name diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java index 027c75073f6f0..e14b5c9b1d06b 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java @@ -65,6 +65,7 @@ import org.elasticsearch.Version; import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory; import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory; +import org.elasticsearch.index.analysis.MultiTermAwareComponent; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy; import org.tartarus.snowball.ext.DutchStemmer; @@ -77,7 +78,7 @@ */ public enum PreBuiltTokenFilters { - WORD_DELIMITER(CachingStrategy.ONE) { + WORD_DELIMITER(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new WordDelimiterFilter(tokenStream, @@ -89,112 +90,112 @@ public TokenStream create(TokenStream tokenStream, Version version) { } }, - STOP(CachingStrategy.LUCENE) { + STOP(CachingStrategy.LUCENE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new StopFilter(tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET); } }, - TRIM(CachingStrategy.LUCENE) { + TRIM(CachingStrategy.LUCENE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new TrimFilter(tokenStream); } }, - REVERSE(CachingStrategy.LUCENE) { + REVERSE(CachingStrategy.LUCENE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new ReverseStringFilter(tokenStream); } }, - ASCIIFOLDING(CachingStrategy.ONE) { + ASCIIFOLDING(CachingStrategy.ONE, true) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new ASCIIFoldingFilter(tokenStream); } }, - LENGTH(CachingStrategy.LUCENE) { + LENGTH(CachingStrategy.LUCENE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new LengthFilter(tokenStream, 0, Integer.MAX_VALUE); } }, - COMMON_GRAMS(CachingStrategy.LUCENE) { + COMMON_GRAMS(CachingStrategy.LUCENE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new CommonGramsFilter(tokenStream, CharArraySet.EMPTY_SET); } }, - LOWERCASE(CachingStrategy.LUCENE) { + LOWERCASE(CachingStrategy.LUCENE, true) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new LowerCaseFilter(tokenStream); } }, - UPPERCASE(CachingStrategy.LUCENE) { + UPPERCASE(CachingStrategy.LUCENE, true) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new UpperCaseFilter(tokenStream); } }, - KSTEM(CachingStrategy.ONE) { + KSTEM(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new KStemFilter(tokenStream); } }, - PORTER_STEM(CachingStrategy.ONE) { + PORTER_STEM(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new PorterStemFilter(tokenStream); } }, - STANDARD(CachingStrategy.LUCENE) { + STANDARD(CachingStrategy.LUCENE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new StandardFilter(tokenStream); } }, - CLASSIC(CachingStrategy.ONE) { + CLASSIC(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new ClassicFilter(tokenStream); } }, - NGRAM(CachingStrategy.LUCENE) { + NGRAM(CachingStrategy.LUCENE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new NGramTokenFilter(tokenStream); } }, - EDGE_NGRAM(CachingStrategy.LUCENE) { + EDGE_NGRAM(CachingStrategy.LUCENE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new EdgeNGramTokenFilter(tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE); } }, - UNIQUE(CachingStrategy.ONE) { + UNIQUE(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new UniqueTokenFilter(tokenStream); } }, - TRUNCATE(CachingStrategy.ONE) { + TRUNCATE(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new TruncateTokenFilter(tokenStream, 10); @@ -202,189 +203,189 @@ public TokenStream create(TokenStream tokenStream, Version version) { }, // Extended Token Filters - SNOWBALL(CachingStrategy.ONE) { + SNOWBALL(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new SnowballFilter(tokenStream, "English"); } }, - STEMMER(CachingStrategy.ONE) { + STEMMER(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new PorterStemFilter(tokenStream); } }, - ELISION(CachingStrategy.ONE) { + ELISION(CachingStrategy.ONE, true) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new ElisionFilter(tokenStream, FrenchAnalyzer.DEFAULT_ARTICLES); } }, - ARABIC_STEM(CachingStrategy.ONE) { + ARABIC_STEM(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new ArabicStemFilter(tokenStream); } }, - BRAZILIAN_STEM(CachingStrategy.ONE) { + BRAZILIAN_STEM(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new BrazilianStemFilter(tokenStream); } }, - CZECH_STEM(CachingStrategy.ONE) { + CZECH_STEM(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new CzechStemFilter(tokenStream); } }, - DUTCH_STEM(CachingStrategy.ONE) { + DUTCH_STEM(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new SnowballFilter(tokenStream, new DutchStemmer()); } }, - FRENCH_STEM(CachingStrategy.ONE) { + FRENCH_STEM(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new SnowballFilter(tokenStream, new FrenchStemmer()); } }, - GERMAN_STEM(CachingStrategy.ONE) { + GERMAN_STEM(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new GermanStemFilter(tokenStream); } }, - RUSSIAN_STEM(CachingStrategy.ONE) { + RUSSIAN_STEM(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new SnowballFilter(tokenStream, "Russian"); } }, - KEYWORD_REPEAT(CachingStrategy.ONE) { + KEYWORD_REPEAT(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new KeywordRepeatFilter(tokenStream); } }, - ARABIC_NORMALIZATION(CachingStrategy.ONE) { + ARABIC_NORMALIZATION(CachingStrategy.ONE, true) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new ArabicNormalizationFilter(tokenStream); } }, - PERSIAN_NORMALIZATION(CachingStrategy.ONE) { + PERSIAN_NORMALIZATION(CachingStrategy.ONE, true) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new PersianNormalizationFilter(tokenStream); } }, - TYPE_AS_PAYLOAD(CachingStrategy.ONE) { + TYPE_AS_PAYLOAD(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new TypeAsPayloadTokenFilter(tokenStream); } }, - SHINGLE(CachingStrategy.ONE) { + SHINGLE(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new ShingleFilter(tokenStream); } }, - GERMAN_NORMALIZATION(CachingStrategy.ONE) { + GERMAN_NORMALIZATION(CachingStrategy.ONE, true) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new GermanNormalizationFilter(tokenStream); } }, - HINDI_NORMALIZATION(CachingStrategy.ONE) { + HINDI_NORMALIZATION(CachingStrategy.ONE, true) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new HindiNormalizationFilter(tokenStream); } }, - INDIC_NORMALIZATION(CachingStrategy.ONE) { + INDIC_NORMALIZATION(CachingStrategy.ONE, true) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new IndicNormalizationFilter(tokenStream); } }, - SORANI_NORMALIZATION(CachingStrategy.ONE) { + SORANI_NORMALIZATION(CachingStrategy.ONE, true) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new SoraniNormalizationFilter(tokenStream); } }, - SCANDINAVIAN_NORMALIZATION(CachingStrategy.ONE) { + SCANDINAVIAN_NORMALIZATION(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new ScandinavianNormalizationFilter(tokenStream); } }, - SCANDINAVIAN_FOLDING(CachingStrategy.ONE) { + SCANDINAVIAN_FOLDING(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new ScandinavianFoldingFilter(tokenStream); } }, - APOSTROPHE(CachingStrategy.ONE) { + APOSTROPHE(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new ApostropheFilter(tokenStream); } }, - CJK_WIDTH(CachingStrategy.ONE) { + CJK_WIDTH(CachingStrategy.ONE, true) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new CJKWidthFilter(tokenStream); } }, - DECIMAL_DIGIT(CachingStrategy.ONE) { + DECIMAL_DIGIT(CachingStrategy.ONE, true) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new DecimalDigitFilter(tokenStream); } }, - CJK_BIGRAM(CachingStrategy.ONE) { + CJK_BIGRAM(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new CJKBigramFilter(tokenStream); } }, - DELIMITED_PAYLOAD_FILTER(CachingStrategy.ONE) { + DELIMITED_PAYLOAD_FILTER(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new DelimitedPayloadTokenFilter(tokenStream, DelimitedPayloadTokenFilterFactory.DEFAULT_DELIMITER, DelimitedPayloadTokenFilterFactory.DEFAULT_ENCODER); } }, - LIMIT(CachingStrategy.ONE) { + LIMIT(CachingStrategy.ONE, false) { @Override public TokenStream create(TokenStream tokenStream, Version version) { return new LimitTokenCountFilter(tokenStream, LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT, LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS); @@ -396,33 +397,54 @@ public TokenStream create(TokenStream tokenStream, Version version) { abstract public TokenStream create(TokenStream tokenStream, Version version); protected final PreBuiltCacheFactory.PreBuiltCache cache; + private final boolean multiTermAware; - - PreBuiltTokenFilters(CachingStrategy cachingStrategy) { + PreBuiltTokenFilters(CachingStrategy cachingStrategy, boolean multiTermAware) { cache = PreBuiltCacheFactory.getCache(cachingStrategy); + this.multiTermAware = multiTermAware; } public synchronized TokenFilterFactory getTokenFilterFactory(final Version version) { TokenFilterFactory factory = cache.get(version); if (factory == null) { - final String finalName = name(); - factory = new TokenFilterFactory() { - @Override - public String name() { - return finalName.toLowerCase(Locale.ROOT); - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return valueOf(finalName).create(tokenStream, version); - } - }; + if (multiTermAware) { + factory = new MultiTermAwareTokenFilterFactory() { + @Override + public String name() { + return PreBuiltTokenFilters.this.name().toLowerCase(Locale.ROOT); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return PreBuiltTokenFilters.this.create(tokenStream, version); + } + }; + } else { + factory = new TokenFilterFactory() { + @Override + public String name() { + return PreBuiltTokenFilters.this.name().toLowerCase(Locale.ROOT); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return PreBuiltTokenFilters.this.create(tokenStream, version); + } + }; + } cache.put(version, factory); } return factory; } + private static abstract class MultiTermAwareTokenFilterFactory implements TokenFilterFactory, MultiTermAwareComponent { + @Override + public Object getMultiTermComponent() { + return this; + } + } + /** * Get a pre built TokenFilter by its name or fallback to the default one * @param name TokenFilter name diff --git a/core/src/main/java/org/elasticsearch/rest/action/support/RestActions.java b/core/src/main/java/org/elasticsearch/rest/action/support/RestActions.java index 66998ee872789..3b3b4a1870f79 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/support/RestActions.java +++ b/core/src/main/java/org/elasticsearch/rest/action/support/RestActions.java @@ -195,7 +195,6 @@ public static QueryBuilder urlParamsToQueryBuilder(RestRequest request) { queryBuilder.defaultField(request.param("df")); queryBuilder.analyzer(request.param("analyzer")); queryBuilder.analyzeWildcard(request.paramAsBoolean("analyze_wildcard", false)); - queryBuilder.lowercaseExpandedTerms(request.paramAsBoolean("lowercase_expanded_terms", true)); queryBuilder.lenient(request.paramAsBoolean("lenient", null)); String defaultOperator = request.param("default_operator"); if (defaultOperator != null) { diff --git a/core/src/test/java/org/apache/lucene/queryparser/classic/MapperQueryParserTests.java b/core/src/test/java/org/apache/lucene/queryparser/classic/MapperQueryParserTests.java new file mode 100644 index 0000000000000..3135a87f1b27d --- /dev/null +++ b/core/src/test/java/org/apache/lucene/queryparser/classic/MapperQueryParserTests.java @@ -0,0 +1,121 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.queryparser.classic; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.test.ESSingleNodeTestCase; + +public class MapperQueryParserTests extends ESSingleNodeTestCase { + + public void testTermQuery() throws ParseException { + IndexService index = createIndex("index", Settings.EMPTY, "type", "field", "type=text,analyzer=english"); + QueryShardContext context = index.newQueryShardContext(); + MapperQueryParser parser = new MapperQueryParser(context); + QueryParserSettings settings = new QueryParserSettings(); + settings.defaultField("field"); + parser.reset(settings); + Query query = parser.parse("Foxes"); + assertEquals(new TermQuery(new Term("field", "fox")), query); // the whole chain was applied + } + + public void testPhraseQuery() throws ParseException { + IndexService index = createIndex("index", Settings.EMPTY, "type", "field", "type=text,analyzer=english"); + QueryShardContext context = index.newQueryShardContext(); + MapperQueryParser parser = new MapperQueryParser(context); + QueryParserSettings settings = new QueryParserSettings(); + settings.defaultField("field"); + parser.reset(settings); + Query query = parser.parse("\"Quick Foxes\""); + assertEquals(new BooleanQuery.Builder() + .setDisableCoord(true) + .add(new PhraseQuery("field", "quick", "fox"), Occur.SHOULD) // the whole chain was applied + .build(), + query); + } + + public void testPrefixQuery() throws ParseException { + IndexService index = createIndex("index", Settings.EMPTY, "type", "field", "type=text,analyzer=english"); + QueryShardContext context = index.newQueryShardContext(); + MapperQueryParser parser = new MapperQueryParser(context); + QueryParserSettings settings = new QueryParserSettings(); + settings.defaultField("field"); + parser.reset(settings); + Query query = parser.parse("Tables*"); + assertEquals(new PrefixQuery(new Term("field", "tables")), query); // lowercase was applied but not stemming + + settings.analyzeWildcard(true); + parser.reset(settings); + query = parser.parse("Tables*"); + assertEquals(new PrefixQuery(new Term("field", "tabl")), query); + } + + public void testWildcardQuery() throws ParseException { + IndexService index = createIndex("index", Settings.EMPTY, "type", "field", "type=text,analyzer=english"); + QueryShardContext context = index.newQueryShardContext(); + MapperQueryParser parser = new MapperQueryParser(context); + QueryParserSettings settings = new QueryParserSettings(); + settings.defaultField("field"); + parser.reset(settings); + Query query = parser.parse("Fr*days"); + assertEquals(new WildcardQuery(new Term("field", "fr*days")), query); // lowercase was applied but not stemming + + settings.analyzeWildcard(true); + parser.reset(settings); + query = parser.parse("Fr*days"); + assertEquals(new WildcardQuery(new Term("field", "fr*dai")), query); + } + + public void testFuzzyQuery() throws ParseException { + IndexService index = createIndex("index", Settings.EMPTY, "type", "field", "type=text,analyzer=english"); + QueryShardContext context = index.newQueryShardContext(); + MapperQueryParser parser = new MapperQueryParser(context); + QueryParserSettings settings = new QueryParserSettings(); + settings.defaultField("field"); + parser.reset(settings); + Query query = parser.parse("Toys~1"); + assertEquals(new FuzzyQuery(new Term("field", "toys"), 1), query); // lowercase was applied but not stemming + } + + public void testRangeQuery() throws ParseException { + IndexService index = createIndex("index", Settings.EMPTY, "type", "field", "type=text,analyzer=english"); + QueryShardContext context = index.newQueryShardContext(); + MapperQueryParser parser = new MapperQueryParser(context); + QueryParserSettings settings = new QueryParserSettings(); + settings.defaultField("field"); + parser.reset(settings); + Query query = parser.parse("[A TO B]"); + assertEquals(new TermRangeQuery("field", new BytesRef("a"), new BytesRef("b"), + true, true), query); // lowercase was applied but not stemming + } + +} diff --git a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java index 6893fda75b84e..96ceff04fbf85 100644 --- a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java +++ b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java @@ -19,8 +19,182 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.ar.ArabicNormalizationFilterFactory; +import org.apache.lucene.analysis.ar.ArabicStemFilterFactory; +import org.apache.lucene.analysis.br.BrazilianStemFilterFactory; +import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory; +import org.apache.lucene.analysis.cjk.CJKWidthFilterFactory; +import org.apache.lucene.analysis.ckb.SoraniNormalizationFilterFactory; +import org.apache.lucene.analysis.commongrams.CommonGramsFilterFactory; +import org.apache.lucene.analysis.core.DecimalDigitFilterFactory; +import org.apache.lucene.analysis.core.LetterTokenizerFactory; +import org.apache.lucene.analysis.core.LowerCaseFilterFactory; +import org.apache.lucene.analysis.core.StopFilterFactory; +import org.apache.lucene.analysis.core.UpperCaseFilterFactory; +import org.apache.lucene.analysis.core.WhitespaceTokenizerFactory; +import org.apache.lucene.analysis.cz.CzechStemFilterFactory; +import org.apache.lucene.analysis.de.GermanNormalizationFilterFactory; +import org.apache.lucene.analysis.en.KStemFilterFactory; +import org.apache.lucene.analysis.en.PorterStemFilterFactory; +import org.apache.lucene.analysis.fa.PersianNormalizationFilterFactory; +import org.apache.lucene.analysis.hi.HindiNormalizationFilterFactory; +import org.apache.lucene.analysis.in.IndicNormalizationFilterFactory; +import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory; +import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory; +import org.apache.lucene.analysis.miscellaneous.LengthFilterFactory; +import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory; +import org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilterFactory; +import org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilterFactory; +import org.apache.lucene.analysis.miscellaneous.TrimFilterFactory; +import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilterFactory; +import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; +import org.apache.lucene.analysis.ngram.EdgeNGramFilterFactory; +import org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory; +import org.apache.lucene.analysis.ngram.NGramFilterFactory; +import org.apache.lucene.analysis.ngram.NGramTokenizerFactory; +import org.apache.lucene.analysis.path.PathHierarchyTokenizerFactory; +import org.apache.lucene.analysis.pattern.PatternTokenizerFactory; +import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilterFactory; +import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilterFactory; +import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory; +import org.apache.lucene.analysis.shingle.ShingleFilterFactory; +import org.apache.lucene.analysis.standard.ClassicFilterFactory; +import org.apache.lucene.analysis.standard.ClassicTokenizerFactory; +import org.apache.lucene.analysis.standard.StandardFilterFactory; +import org.apache.lucene.analysis.standard.StandardTokenizerFactory; +import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory; +import org.apache.lucene.analysis.th.ThaiTokenizerFactory; +import org.apache.lucene.analysis.tr.ApostropheFilterFactory; +import org.apache.lucene.analysis.util.ElisionFilterFactory; import org.elasticsearch.AnalysisFactoryTestCase; +import org.elasticsearch.Version; +import org.elasticsearch.common.collect.MapBuilder; +import org.elasticsearch.indices.analysis.PreBuiltCharFilters; +import org.elasticsearch.indices.analysis.PreBuiltTokenFilters; +import org.elasticsearch.indices.analysis.PreBuiltTokenizers; + +import java.util.Map; public class AnalysisFactoryTests extends AnalysisFactoryTestCase { - // tests are inherited + + static final Map> KNOWN_TOKENIZERS + = new MapBuilder>() + .put(PreBuiltTokenizers.CLASSIC, ClassicTokenizerFactory.class) + .put(PreBuiltTokenizers.EDGE_NGRAM, EdgeNGramTokenizerFactory.class) + .put(PreBuiltTokenizers.KEYWORD, org.apache.lucene.analysis.core.KeywordTokenizerFactory.class) + .put(PreBuiltTokenizers.LETTER, LetterTokenizerFactory.class) + .put(PreBuiltTokenizers.LOWERCASE, Void.class) + .put(PreBuiltTokenizers.NGRAM, NGramTokenizerFactory.class) + .put(PreBuiltTokenizers.PATH_HIERARCHY, PathHierarchyTokenizerFactory.class) + .put(PreBuiltTokenizers.PATTERN, PatternTokenizerFactory.class) + .put(PreBuiltTokenizers.STANDARD, StandardTokenizerFactory.class) + .put(PreBuiltTokenizers.THAI, ThaiTokenizerFactory.class) + .put(PreBuiltTokenizers.UAX_URL_EMAIL, UAX29URLEmailTokenizerFactory.class) + .put(PreBuiltTokenizers.WHITESPACE, WhitespaceTokenizerFactory.class) + .immutableMap(); + + static final Map> KNOWN_CHAR_FILTERS + = new MapBuilder>() + .put(PreBuiltCharFilters.HTML_STRIP, HTMLStripCharFilterFactory.class) + .immutableMap(); + + static final Map> KNOWN_TOKEN_FILTERS + = new MapBuilder>() + .put(PreBuiltTokenFilters.APOSTROPHE, ApostropheFilterFactory.class) + .put(PreBuiltTokenFilters.ARABIC_NORMALIZATION, ArabicNormalizationFilterFactory.class) + .put(PreBuiltTokenFilters.ARABIC_STEM, ArabicStemFilterFactory.class) + .put(PreBuiltTokenFilters.ASCIIFOLDING, ASCIIFoldingFilterFactory.class) + .put(PreBuiltTokenFilters.BRAZILIAN_STEM, BrazilianStemFilterFactory.class) + .put(PreBuiltTokenFilters.CJK_BIGRAM, org.apache.lucene.analysis.cjk.CJKBigramFilterFactory.class) + .put(PreBuiltTokenFilters.CJK_WIDTH, CJKWidthFilterFactory.class) + .put(PreBuiltTokenFilters.CLASSIC, ClassicFilterFactory.class) + .put(PreBuiltTokenFilters.COMMON_GRAMS, CommonGramsFilterFactory.class) + .put(PreBuiltTokenFilters.CZECH_STEM, CzechStemFilterFactory.class) + .put(PreBuiltTokenFilters.DECIMAL_DIGIT, DecimalDigitFilterFactory.class) + .put(PreBuiltTokenFilters.DELIMITED_PAYLOAD_FILTER, DelimitedPayloadTokenFilterFactory.class) + .put(PreBuiltTokenFilters.DUTCH_STEM, Void.class) // no Lucene factory + .put(PreBuiltTokenFilters.EDGE_NGRAM, EdgeNGramFilterFactory.class) + .put(PreBuiltTokenFilters.ELISION, ElisionFilterFactory.class) + .put(PreBuiltTokenFilters.FRENCH_STEM, Void.class) // no Lucene factory + .put(PreBuiltTokenFilters.GERMAN_NORMALIZATION, GermanNormalizationFilterFactory.class) + .put(PreBuiltTokenFilters.GERMAN_STEM, Void.class) // no Lucene factory + .put(PreBuiltTokenFilters.HINDI_NORMALIZATION, HindiNormalizationFilterFactory.class) + .put(PreBuiltTokenFilters.INDIC_NORMALIZATION, IndicNormalizationFilterFactory.class) + .put(PreBuiltTokenFilters.KEYWORD_REPEAT, KeywordRepeatFilterFactory.class) + .put(PreBuiltTokenFilters.KSTEM, KStemFilterFactory.class) + .put(PreBuiltTokenFilters.LENGTH, LengthFilterFactory.class) + .put(PreBuiltTokenFilters.LIMIT, LimitTokenCountFilterFactory.class) + .put(PreBuiltTokenFilters.LOWERCASE, LowerCaseFilterFactory.class) + .put(PreBuiltTokenFilters.NGRAM, NGramFilterFactory.class) + .put(PreBuiltTokenFilters.PERSIAN_NORMALIZATION, PersianNormalizationFilterFactory.class) + .put(PreBuiltTokenFilters.PORTER_STEM, PorterStemFilterFactory.class) + .put(PreBuiltTokenFilters.REVERSE, ReverseStringFilterFactory.class) + .put(PreBuiltTokenFilters.RUSSIAN_STEM, Void.class) + .put(PreBuiltTokenFilters.SCANDINAVIAN_FOLDING, ScandinavianFoldingFilterFactory.class) + .put(PreBuiltTokenFilters.SCANDINAVIAN_NORMALIZATION, ScandinavianNormalizationFilterFactory.class) + .put(PreBuiltTokenFilters.SHINGLE, ShingleFilterFactory.class) + .put(PreBuiltTokenFilters.SNOWBALL, Void.class) // no Lucene factory + .put(PreBuiltTokenFilters.SORANI_NORMALIZATION, SoraniNormalizationFilterFactory.class) + .put(PreBuiltTokenFilters.STANDARD, StandardFilterFactory.class) + .put(PreBuiltTokenFilters.STEMMER, PorterStemFilterFactory.class) + .put(PreBuiltTokenFilters.STOP, StopFilterFactory.class) + .put(PreBuiltTokenFilters.TRIM, TrimFilterFactory.class) + .put(PreBuiltTokenFilters.TRUNCATE, TruncateTokenFilterFactory.class) + .put(PreBuiltTokenFilters.TYPE_AS_PAYLOAD, TypeAsPayloadTokenFilterFactory.class) + .put(PreBuiltTokenFilters.UNIQUE, Void.class) // no Lucene factory + .put(PreBuiltTokenFilters.UPPERCASE, UpperCaseFilterFactory.class) + .put(PreBuiltTokenFilters.WORD_DELIMITER, WordDelimiterFilterFactory.class) + .immutableMap(); + + public void testPrebuiltTokenizers() { + for (PreBuiltTokenizers tokenizer : PreBuiltTokenizers.values()) { + Class luceneFactory = KNOWN_TOKENIZERS.get(tokenizer); + assertNotNull("Add " + tokenizer + " to KNOWN_TOKENIZERS", luceneFactory); + if (Void.class.equals(luceneFactory)) { + continue; + } + assertTrue( + "Not a Lucene factory for " + tokenizer, + org.apache.lucene.analysis.util.TokenizerFactory.class.isAssignableFrom(luceneFactory)); + TokenizerFactory factory = tokenizer.getTokenizerFactory(Version.CURRENT); + assertEquals("Wrong multi-term behaviour for " + tokenizer, + org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory), + factory instanceof MultiTermAwareComponent); + } + } + + public void testPrebuiltCharFilters() { + for (PreBuiltCharFilters charFilter : PreBuiltCharFilters.values()) { + Class luceneFactory = KNOWN_CHAR_FILTERS.get(charFilter); + assertNotNull("Add " + charFilter + " to KNOWN_CHAR_FILTERS", luceneFactory); + if (Void.class.equals(luceneFactory)) { + continue; + } + assertTrue( + "Not a Lucene factory for " + charFilter, + org.apache.lucene.analysis.util.CharFilterFactory.class.isAssignableFrom(luceneFactory)); + CharFilterFactory factory = charFilter.getCharFilterFactory(Version.CURRENT); + assertEquals("Wrong multi-term behaviour for " + charFilter, + org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory), + factory instanceof MultiTermAwareComponent); + } + } + + public void testPrebuiltTokenFilterFactories() { + for (PreBuiltTokenFilters tokenFilter : PreBuiltTokenFilters.values()) { + Class luceneFactory = KNOWN_TOKEN_FILTERS.get(tokenFilter); + assertNotNull("Add " + tokenFilter + " to KNOWN_TOKEN_FILTERS", luceneFactory); + if (Void.class.equals(luceneFactory)) { + continue; + } + assertTrue( + "Not a Lucene factory for " + tokenFilter, + org.apache.lucene.analysis.util.TokenFilterFactory.class.isAssignableFrom(luceneFactory)); + TokenFilterFactory factory = tokenFilter.getTokenFilterFactory(Version.CURRENT); + assertEquals("Wrong multi-term behaviour for " + tokenFilter, + org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory), + factory instanceof MultiTermAwareComponent); + } + } + } diff --git a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisServiceTests.java b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisServiceTests.java index b72996bd1a1db..2eb39b2d8d5fd 100644 --- a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisServiceTests.java +++ b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisServiceTests.java @@ -21,6 +21,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -47,7 +48,7 @@ public class AnalysisServiceTests extends ESTestCase { private static AnalyzerProvider analyzerProvider(final String name) { - return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDEX, new EnglishAnalyzer()); + return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDEX, new EnglishAnalyzer(), new KeywordAnalyzer()); } public void testDefaultAnalyzers() throws IOException { @@ -80,7 +81,8 @@ public void testOverrideDefaultIndexAnalyzer() { Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); try { AnalysisService analysisService = new AnalysisService(IndexSettingsModule.newIndexSettings("index", settings), - Collections.singletonMap("default_index", new PreBuiltAnalyzerProvider("default_index", AnalyzerScope.INDEX, new EnglishAnalyzer())), + Collections.singletonMap("default_index", new PreBuiltAnalyzerProvider("default_index", AnalyzerScope.INDEX, + new EnglishAnalyzer(), new KeywordAnalyzer())), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap()); fail("Expected ISE"); } catch (IllegalArgumentException e) { diff --git a/core/src/test/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerTests.java b/core/src/test/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerTests.java index b1dcdec646b2b..4089d8f76b9f0 100644 --- a/core/src/test/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerTests.java +++ b/core/src/test/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerTests.java @@ -81,7 +81,8 @@ public void testThatAnalyzersAreUsedInMapping() throws IOException { Version randomVersion = randomVersion(random()); Settings indexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, randomVersion).build(); - NamedAnalyzer namedAnalyzer = new PreBuiltAnalyzerProvider(analyzerName, AnalyzerScope.INDEX, randomPreBuiltAnalyzer.getAnalyzer(randomVersion)).get(); + NamedAnalyzer namedAnalyzer = new PreBuiltAnalyzerProvider(analyzerName, AnalyzerScope.INDEX, + randomPreBuiltAnalyzer.getAnalyzer(randomVersion), randomPreBuiltAnalyzer.getAnalyzer(randomVersion)).get(); String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field").field("type", "text").field("analyzer", analyzerName).endObject().endObject() diff --git a/core/src/test/java/org/elasticsearch/index/mapper/DocumentFieldMapperTests.java b/core/src/test/java/org/elasticsearch/index/mapper/DocumentFieldMapperTests.java index 43fc1d5c82ac8..7d43eade957bd 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/DocumentFieldMapperTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/DocumentFieldMapperTests.java @@ -111,7 +111,9 @@ public void testAnalyzers() throws IOException { FakeFieldType fieldType1 = new FakeFieldType(); fieldType1.setName("field1"); fieldType1.setIndexAnalyzer(new NamedAnalyzer("foo", new FakeAnalyzer("index"))); - fieldType1.setSearchAnalyzer(new NamedAnalyzer("bar", new FakeAnalyzer("search"))); + fieldType1.setSearchAnalyzer( + new NamedAnalyzer("bar", new FakeAnalyzer("search")), + new NamedAnalyzer("bar", new FakeAnalyzer("search"))); fieldType1.setSearchQuoteAnalyzer(new NamedAnalyzer("baz", new FakeAnalyzer("search_quote"))); FieldMapper fieldMapper1 = new FakeFieldMapper("field1", fieldType1); diff --git a/core/src/test/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java b/core/src/test/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java index 68e59527982a2..a410e902d09ad 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.index.mapper; +import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -84,17 +85,23 @@ public void normalizeOther(MappedFieldType other) { new Modifier("search_analyzer", true) { @Override public void modify(MappedFieldType ft) { - ft.setSearchAnalyzer(new NamedAnalyzer("bar", new StandardAnalyzer())); + ft.setSearchAnalyzer( + new NamedAnalyzer("bar", new StandardAnalyzer()), + new NamedAnalyzer("bar", new KeywordAnalyzer())); } }, new Modifier("search_analyzer", true) { @Override public void modify(MappedFieldType ft) { - ft.setSearchAnalyzer(new NamedAnalyzer("bar", new StandardAnalyzer())); + ft.setSearchAnalyzer( + new NamedAnalyzer("bar", new StandardAnalyzer()), + new NamedAnalyzer("bar", new KeywordAnalyzer())); } @Override public void normalizeOther(MappedFieldType other) { - other.setSearchAnalyzer(new NamedAnalyzer("foo", new StandardAnalyzer())); + other.setSearchAnalyzer( + new NamedAnalyzer("foo", new StandardAnalyzer()), + new NamedAnalyzer("foo", new KeywordAnalyzer())); } }, new Modifier("search_quote_analyzer", true) { diff --git a/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java index 8a3552cf8f2a0..0866ade6cdc56 100644 --- a/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java @@ -101,9 +101,6 @@ protected QueryStringQueryBuilder doCreateTestQueryBuilder() { if (randomBoolean()) { queryStringQueryBuilder.maxDeterminizedStates(randomIntBetween(1, 100)); } - if (randomBoolean()) { - queryStringQueryBuilder.lowercaseExpandedTerms(randomBoolean()); - } if (randomBoolean()) { queryStringQueryBuilder.autoGeneratePhraseQueries(randomBoolean()); } @@ -143,9 +140,6 @@ protected QueryStringQueryBuilder doCreateTestQueryBuilder() { if (randomBoolean()) { queryStringQueryBuilder.useDisMax(randomBoolean()); } - if (randomBoolean()) { - queryStringQueryBuilder.locale(randomLocale(random())); - } if (randomBoolean()) { queryStringQueryBuilder.timeZone(randomDateTimeZone().getID()); } @@ -308,7 +302,7 @@ public void testToQueryWildcarQuery() throws Exception { for (Operator op : Operator.values()) { BooleanClause.Occur defaultOp = op.toBooleanClauseOccur(); MapperQueryParser queryParser = new MapperQueryParser(createShardContext()); - QueryParserSettings settings = new QueryParserSettings("first foo-bar-foobar* last"); + QueryParserSettings settings = new QueryParserSettings(); settings.defaultField(STRING_FIELD_NAME); settings.fieldsAndWeights(Collections.emptyMap()); settings.analyzeWildcard(true); @@ -336,14 +330,14 @@ public void testToQueryWilcardQueryWithSynonyms() throws Exception { for (Operator op : Operator.values()) { BooleanClause.Occur defaultOp = op.toBooleanClauseOccur(); MapperQueryParser queryParser = new MapperQueryParser(createShardContext()); - QueryParserSettings settings = new QueryParserSettings("first foo-bar-foobar* last"); + QueryParserSettings settings = new QueryParserSettings(); settings.defaultField(STRING_FIELD_NAME); settings.fieldsAndWeights(Collections.emptyMap()); settings.analyzeWildcard(true); settings.fuzziness(Fuzziness.AUTO); settings.rewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE); settings.defaultOperator(op.toQueryParserOperator()); - settings.forceAnalyzer(new MockRepeatAnalyzer()); + settings.analyzer(new MockRepeatAnalyzer(), new MockRepeatAnalyzer()); queryParser.reset(settings); Query query = queryParser.parse("first foo-bar-foobar* last"); @@ -522,13 +516,11 @@ public void testFromJson() throws IOException { " \"default_operator\" : \"or\",\n" + " \"auto_generated_phrase_queries\" : false,\n" + " \"max_determined_states\" : 10000,\n" + - " \"lowercase_expanded_terms\" : true,\n" + " \"enable_position_increment\" : true,\n" + " \"fuzziness\" : \"AUTO\",\n" + " \"fuzzy_prefix_length\" : 0,\n" + " \"fuzzy_max_expansions\" : 50,\n" + " \"phrase_slop\" : 0,\n" + - " \"locale\" : \"und\",\n" + " \"escape\" : false,\n" + " \"boost\" : 1.0\n" + " }\n" + diff --git a/core/src/test/java/org/elasticsearch/index/query/SimpleQueryParserTests.java b/core/src/test/java/org/elasticsearch/index/query/SimpleQueryParserTests.java index 8511ad6d9c72e..c2451eadbfb70 100644 --- a/core/src/test/java/org/elasticsearch/index/query/SimpleQueryParserTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/SimpleQueryParserTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; @@ -28,17 +29,22 @@ import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanClause; +import org.elasticsearch.Version; +import org.elasticsearch.indices.analysis.PreBuiltAnalyzers; import org.elasticsearch.test.ESTestCase; +import java.util.Collections; import java.util.HashMap; import java.util.Map; import static org.hamcrest.Matchers.equalTo; public class SimpleQueryParserTests extends ESTestCase { + private static class MockSimpleQueryParser extends SimpleQueryParser { - public MockSimpleQueryParser(Analyzer analyzer, Map weights, int flags, Settings settings) { - super(analyzer, weights, flags, settings, null); + public MockSimpleQueryParser(Analyzer analyzer, Analyzer multiTermAnalyzer, + Map weights, int flags, Settings settings) { + super(analyzer, multiTermAnalyzer, weights, flags, settings, null); } @Override @@ -47,12 +53,20 @@ protected Query newTermQuery(Term term) { } } + public void testPrefixQuery() { + SimpleQueryParser parser = new MockSimpleQueryParser( + PreBuiltAnalyzers.ENGLISH.getAnalyzer(Version.CURRENT), + PreBuiltAnalyzers.ENGLISH.getMultiTermAnalyzer(Version.CURRENT), + Collections.singletonMap("field", 1f), -1, new SimpleQueryParser.Settings()); + assertEquals(new PrefixQuery(new Term("field", "tables")), parser.parse("Tables*")); + } + public void testAnalyzeWildcard() { SimpleQueryParser.Settings settings = new SimpleQueryParser.Settings(); settings.analyzeWildcard(true); Map weights = new HashMap<>(); weights.put("field1", 1.0f); - SimpleQueryParser parser = new MockSimpleQueryParser(new StandardAnalyzer(), weights, -1, settings); + SimpleQueryParser parser = new MockSimpleQueryParser(new StandardAnalyzer(), new KeywordAnalyzer(), weights, -1, settings); for (Operator op : Operator.values()) { BooleanClause.Occur defaultOp = op.toBooleanClauseOccur(); parser.setDefaultOperator(defaultOp); @@ -76,7 +90,8 @@ public void testAnalyzerWildcardWithSynonyms() { settings.analyzeWildcard(true); Map weights = new HashMap<>(); weights.put("field1", 1.0f); - SimpleQueryParser parser = new MockSimpleQueryParser(new MockRepeatAnalyzer(), weights, -1, settings); + SimpleQueryParser parser = new MockSimpleQueryParser(new MockRepeatAnalyzer(), new MockRepeatAnalyzer(), + weights, -1, settings); for (Operator op : Operator.values()) { BooleanClause.Occur defaultOp = op.toBooleanClauseOccur(); diff --git a/core/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java index 9168b489eb2c0..e578f65f4afce 100644 --- a/core/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java @@ -55,12 +55,6 @@ protected SimpleQueryStringBuilder doCreateTestQueryBuilder() { if (randomBoolean()) { result.lenient(randomBoolean()); } - if (randomBoolean()) { - result.lowercaseExpandedTerms(randomBoolean()); - } - if (randomBoolean()) { - result.locale(randomLocale(random())); - } if (randomBoolean()) { result.minimumShouldMatch(randomMinimumShouldMatch()); } @@ -109,26 +103,22 @@ public void testDefaults() { assertEquals("Wrong default default operator field.", Operator.OR, SimpleQueryStringBuilder.DEFAULT_OPERATOR); assertEquals("Wrong default default locale.", Locale.ROOT, qb.locale()); - assertEquals("Wrong default default locale field.", Locale.ROOT, SimpleQueryStringBuilder.DEFAULT_LOCALE); assertEquals("Wrong default default analyze_wildcard.", false, qb.analyzeWildcard()); assertEquals("Wrong default default analyze_wildcard field.", false, SimpleQueryStringBuilder.DEFAULT_ANALYZE_WILDCARD); assertEquals("Wrong default default lowercase_expanded_terms.", true, qb.lowercaseExpandedTerms()); - assertEquals("Wrong default default lowercase_expanded_terms field.", true, - SimpleQueryStringBuilder.DEFAULT_LOWERCASE_EXPANDED_TERMS); assertEquals("Wrong default default lenient.", false, qb.lenient()); assertEquals("Wrong default default lenient field.", false, SimpleQueryStringBuilder.DEFAULT_LENIENT); assertEquals("Wrong default default locale.", Locale.ROOT, qb.locale()); - assertEquals("Wrong default default locale field.", Locale.ROOT, SimpleQueryStringBuilder.DEFAULT_LOCALE); } public void testDefaultNullLocale() { SimpleQueryStringBuilder qb = new SimpleQueryStringBuilder("The quick brown fox."); qb.locale(null); - assertEquals("Setting locale to null should result in returning to default value.", SimpleQueryStringBuilder.DEFAULT_LOCALE, + assertEquals("Setting locale to null should result in returning to default value.", Locale.ROOT, qb.locale()); } @@ -349,10 +339,8 @@ public void testFromJson() throws IOException { " \"analyzer\" : \"snowball\",\n" + " \"flags\" : -1,\n" + " \"default_operator\" : \"and\",\n" + - " \"lowercase_expanded_terms\" : true,\n" + " \"lenient\" : false,\n" + " \"analyze_wildcard\" : false,\n" + - " \"locale\" : \"und\",\n" + " \"boost\" : 1.0\n" + " }\n" + "}"; diff --git a/core/src/test/java/org/elasticsearch/indices/analysis/DummyAnalyzerProvider.java b/core/src/test/java/org/elasticsearch/indices/analysis/DummyAnalyzerProvider.java index 68beb817d700b..6f9b177928d93 100644 --- a/core/src/test/java/org/elasticsearch/indices/analysis/DummyAnalyzerProvider.java +++ b/core/src/test/java/org/elasticsearch/indices/analysis/DummyAnalyzerProvider.java @@ -19,10 +19,11 @@ package org.elasticsearch.indices.analysis; +import org.apache.lucene.analysis.Analyzer; import org.elasticsearch.index.analysis.AnalyzerProvider; import org.elasticsearch.index.analysis.AnalyzerScope; -public class DummyAnalyzerProvider implements AnalyzerProvider { +public class DummyAnalyzerProvider implements AnalyzerProvider { @Override public String name() { return "dummy"; @@ -37,4 +38,9 @@ public AnalyzerScope scope() { public DummyAnalyzer get() { return new DummyAnalyzer(); } + + @Override + public Analyzer getMultiTerm() { + return get(); + } } diff --git a/core/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java b/core/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java index 8cb21dadb9b04..51e8fc9b07452 100644 --- a/core/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java +++ b/core/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java @@ -496,24 +496,111 @@ public void testQueryStringAnalyzedWildcard() throws Exception { assertHitCount(searchResponse, 1L); } - public void testLowercaseExpandedTerms() { - createIndex("test"); - - client().prepareIndex("test", "type1", "1").setSource("field1", "value_1", "field2", "value_2").get(); + public void testExpandedTerms() throws IOException { + Settings indexSettings = Settings.builder() + .put("index.analysis.analyzer.uppercase.type", "custom") + .put("index.analysis.analyzer.uppercase.tokenizer", "standard") + .put("index.analysis.analyzer.uppercase.filter", "uppercase") + .build(); + String mapping = jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "text") + .startObject("fields") + .startObject("upper") + .field("type", "text") + .field("analyzer", "uppercase") + .endObject() + .startObject("keyword_text") + .field("type", "text") + .field("analyzer", "keyword") + .endObject() + .startObject("keyword") + .field("type", "keyword") + .endObject() + .endObject() + .endObject() + .endObject().endObject().endObject().string(); + assertAcked(prepareCreate("test").setSettings(indexSettings).addMapping("type", mapping)); + + client().prepareIndex("test", "type", "1").setSource("field", "value_1").get(); refresh(); - SearchResponse searchResponse = client().prepareSearch().setQuery(queryStringQuery("VALUE_3~1").lowercaseExpandedTerms(true)).get(); + // fuzzy + SearchResponse searchResponse = client().prepareSearch().setQuery(queryStringQuery("value_3~1").field("field")).get(); assertHitCount(searchResponse, 1L); - searchResponse = client().prepareSearch().setQuery(queryStringQuery("VALUE_3~1").lowercaseExpandedTerms(false)).get(); - assertHitCount(searchResponse, 0L); - searchResponse = client().prepareSearch().setQuery(queryStringQuery("ValUE_*").lowercaseExpandedTerms(true)).get(); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("value_3~1").field("field.upper")).get(); assertHitCount(searchResponse, 1L); - searchResponse = client().prepareSearch().setQuery(queryStringQuery("vAl*E_1")).get(); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("value_3~1").field("field.keyword_text")).get(); assertHitCount(searchResponse, 1L); - searchResponse = client().prepareSearch().setQuery(queryStringQuery("[VALUE_1 TO VALUE_3]")).get(); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("value_3~1").field("field.keyword")).get(); assertHitCount(searchResponse, 1L); - searchResponse = client().prepareSearch().setQuery(queryStringQuery("[VALUE_1 TO VALUE_3]").lowercaseExpandedTerms(false)).get(); - assertHitCount(searchResponse, 0L); + + searchResponse = client().prepareSearch().setQuery(queryStringQuery("VALUE_3~1").field("field")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("VALUE_3~1").field("field.upper")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("VALUE_3~1").field("field.keyword_text")).get(); + assertHitCount(searchResponse, 0L); // does not match case + searchResponse = client().prepareSearch().setQuery(queryStringQuery("VALUE_3~1").field("field.keyword")).get(); + assertHitCount(searchResponse, 0L); // does not match case + + // prefix + searchResponse = client().prepareSearch().setQuery(queryStringQuery("value_*").field("field")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("value_*").field("field.upper")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("value_*").field("field.keyword_text")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("value_*").field("field.keyword")).get(); + assertHitCount(searchResponse, 1L); + + searchResponse = client().prepareSearch().setQuery(queryStringQuery("VaLuE_*").field("field")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("VaLuE_*").field("field.upper")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("VaLuE_*").field("field.keyword_text")).get(); + assertHitCount(searchResponse, 0L); // does not match case + searchResponse = client().prepareSearch().setQuery(queryStringQuery("VaLuE_*").field("field.keyword")).get(); + assertHitCount(searchResponse, 0L); // does not match case + + // wildcard + searchResponse = client().prepareSearch().setQuery(queryStringQuery("va*ue_*").field("field")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("va*ue_*").field("field.upper")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("va*ue_*").field("field.keyword_text")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("va*ue_*").field("field.keyword")).get(); + assertHitCount(searchResponse, 1L); + + searchResponse = client().prepareSearch().setQuery(queryStringQuery("Va*uE_*").field("field")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("Va*uE_*").field("field.upper")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("Va*uE_*").field("field.keyword_text")).get(); + assertHitCount(searchResponse, 0L); // does not match case + searchResponse = client().prepareSearch().setQuery(queryStringQuery("Va*uE_*").field("field.keyword")).get(); + assertHitCount(searchResponse, 0L); // does not match case + + // range + searchResponse = client().prepareSearch().setQuery(queryStringQuery("[value_1 TO value_3]").field("field")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("[value_1 TO value_3]").field("field.upper")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("[value_1 TO value_3]").field("field.keyword_text")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("[value_1 TO value_3]").field("field.keyword")).get(); + assertHitCount(searchResponse, 1L); + + searchResponse = client().prepareSearch().setQuery(queryStringQuery("[vaLue_1 TO vaLue_3]").field("field")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("[vaLue_1 TO vaLue_3]").field("field.upper")).get(); + assertHitCount(searchResponse, 1L); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("[vaLue_1 TO vaLue_3]").field("field.keyword_text")).get(); + assertHitCount(searchResponse, 0L); // does not match case + searchResponse = client().prepareSearch().setQuery(queryStringQuery("[vaLue_1 TO vaLue_3]").field("field.keyword")).get(); + assertHitCount(searchResponse, 0L); // does not match case } // Issue #3540 diff --git a/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java b/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java index 9502a8183152e..30c0c6db9d6e5 100644 --- a/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java +++ b/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java @@ -30,12 +30,10 @@ import org.elasticsearch.test.ESIntegTestCase; import java.io.IOException; -import java.util.Locale; import java.util.concurrent.ExecutionException; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.index.query.QueryBuilders.boolQuery; -import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery; import static org.elasticsearch.index.query.QueryBuilders.simpleQueryStringQuery; import static org.elasticsearch.index.query.QueryBuilders.termQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; @@ -167,38 +165,10 @@ public void testSimpleQueryStringLowercasing() { assertHitCount(searchResponse, 1L); assertSearchHits(searchResponse, "1"); - searchResponse = client().prepareSearch().setQuery( - simpleQueryStringQuery("Professio*").lowercaseExpandedTerms(false)).get(); - assertHitCount(searchResponse, 0L); - searchResponse = client().prepareSearch().setQuery( simpleQueryStringQuery("Professionan~1")).get(); assertHitCount(searchResponse, 1L); assertSearchHits(searchResponse, "1"); - - searchResponse = client().prepareSearch().setQuery( - simpleQueryStringQuery("Professionan~1").lowercaseExpandedTerms(false)).get(); - assertHitCount(searchResponse, 0L); - } - - public void testQueryStringLocale() { - createIndex("test"); - client().prepareIndex("test", "type1", "1").setSource("body", "bılly").get(); - refresh(); - - SearchResponse searchResponse = client().prepareSearch().setQuery(simpleQueryStringQuery("BILL*")).get(); - assertHitCount(searchResponse, 0L); - searchResponse = client().prepareSearch().setQuery(queryStringQuery("body:BILL*")).get(); - assertHitCount(searchResponse, 0L); - - searchResponse = client().prepareSearch().setQuery( - simpleQueryStringQuery("BILL*").locale(new Locale("tr", "TR"))).get(); - assertHitCount(searchResponse, 1L); - assertSearchHits(searchResponse, "1"); - searchResponse = client().prepareSearch().setQuery( - queryStringQuery("body:BILL*").locale(new Locale("tr", "TR"))).get(); - assertHitCount(searchResponse, 1L); - assertSearchHits(searchResponse, "1"); } public void testNestedFieldSimpleQueryString() throws IOException { @@ -342,7 +312,7 @@ public void testSimpleQueryStringAnalyzeWildcard() throws ExecutionException, In refresh(); SearchResponse searchResponse = client().prepareSearch() - .setQuery(simpleQueryStringQuery("Köln*").analyzeWildcard(true).field("location")).get(); + .setQuery(simpleQueryStringQuery("Köln*").field("location")).get(); assertNoFailures(searchResponse); assertHitCount(searchResponse, 1L); assertSearchHits(searchResponse, "1"); diff --git a/docs/reference/query-dsl/query-string-query.asciidoc b/docs/reference/query-dsl/query-string-query.asciidoc index 60477d6e28a1a..7cb18ca3a8d42 100644 --- a/docs/reference/query-dsl/query-string-query.asciidoc +++ b/docs/reference/query-dsl/query-string-query.asciidoc @@ -40,10 +40,6 @@ with default operator of `AND`, the same query is translated to |`allow_leading_wildcard` |When set, `*` or `?` are allowed as the first character. Defaults to `true`. -|`lowercase_expanded_terms` |Whether terms of wildcard, prefix, fuzzy, -and range queries are to be automatically lower-cased or not (since they -are not analyzed). Default it `true`. - |`enable_position_increments` |Set to `true` to enable position increments in result queries. Defaults to `true`. @@ -61,9 +57,9 @@ phrase matches are required. Default value is `0`. |`boost` |Sets the boost value of the query. Defaults to `1.0`. -|`analyze_wildcard` |By default, wildcards terms in a query string are -not analyzed. By setting this value to `true`, a best effort will be -made to analyze those as well. +|`analyze_wildcard` |By default, only the char filters and token filters that +make sense ape applied to wildcarded terms (eg. `lowercase` but not stemmers). +By setting this value to `true`, the whole analysis chain will be applied. |`auto_generate_phrase_queries` |Defaults to `false`. @@ -80,9 +76,6 @@ both>>. |`lenient` |If set to `true` will cause format based failures (like providing text to a numeric field) to be ignored. -|`locale` | Locale that should be used for string conversions. -Defaults to `ROOT`. - |`time_zone` | Time Zone to be applied to any range query related to dates. See also http://www.joda.org/joda-time/apidocs/org/joda/time/DateTimeZone.html[JODA timezone]. |======================================================================= diff --git a/docs/reference/query-dsl/query-string-syntax.asciidoc b/docs/reference/query-dsl/query-string-syntax.asciidoc index 6755b9e9efe82..e8bba7e37ee18 100644 --- a/docs/reference/query-dsl/query-string-syntax.asciidoc +++ b/docs/reference/query-dsl/query-string-syntax.asciidoc @@ -65,12 +65,10 @@ they match. Leading wildcards can be disabled by setting `allow_leading_wildcard` to `false`. ======= -Wildcarded terms are not analyzed by default -- they are lowercased -(`lowercase_expanded_terms` defaults to `true`) but no further analysis -is done, mainly because it is impossible to accurately analyze a word that -is missing some of its letters. However, by setting `analyze_wildcard` to -`true`, an attempt will be made to analyze wildcarded words before searching -the term list for matching terms. +By default, only the char filters and token filters that make sense are +applied to wildcarded terms (eg. `lowercase` but not stemmers). It is +possible to apply the whole analysis chain by setting `analyze_wildcard` to +`true`. ===== Regular expressions diff --git a/docs/reference/query-dsl/simple-query-string-query.asciidoc b/docs/reference/query-dsl/simple-query-string-query.asciidoc index 796f2517fea6c..036174dddf446 100644 --- a/docs/reference/query-dsl/simple-query-string-query.asciidoc +++ b/docs/reference/query-dsl/simple-query-string-query.asciidoc @@ -44,17 +44,10 @@ creating composite queries. |`flags` |Flags specifying which features of the `simple_query_string` to enable. Defaults to `ALL`. -|`lowercase_expanded_terms` | Whether terms of prefix and fuzzy queries should -be automatically lower-cased or not (since they are not analyzed). Defaults to -`true`. - -|`analyze_wildcard` | Whether terms of prefix queries should be automatically -analyzed or not. If `true` a best effort will be made to analyze the prefix. However, -some analyzers will be not able to provide a meaningful results -based just on the prefix of a term. Defaults to `false`. - -|`locale` | Locale that should be used for string conversions. -Defaults to `ROOT`. +|`analyze_wildcard` | Whether terms of prefix queries should be tokenized +or not. If `false` (default), only the char filters and token filters that +make sense (eg. `lowercase` but not stemmers) will be applied. It is possible +to set it to `true` in order to apply the whole analysis chain. |`lenient` | If set to `true` will cause format based failures (like providing text to a numeric field) to be ignored. diff --git a/docs/reference/search/count.asciidoc b/docs/reference/search/count.asciidoc index 859455e89b72c..3a20eed2214a8 100644 --- a/docs/reference/search/count.asciidoc +++ b/docs/reference/search/count.asciidoc @@ -74,10 +74,7 @@ query. |`lenient` |If set to true will cause format based failures (like providing text to a numeric field) to be ignored. Defaults to false. -|`lowercase_expanded_terms` |Should terms be automatically lowercased or -not. Defaults to `true`. - -|`analyze_wildcard` |Should wildcard and prefix queries be analyzed or +|`analyze_wildcard` |Should wildcard and prefix queries be tokenized or not. Defaults to `false`. |`terminate_after` |The maximum count for each shard, upon diff --git a/docs/reference/search/explain.asciidoc b/docs/reference/search/explain.asciidoc index 125f3124bff4d..fd173e3304f3f 100644 --- a/docs/reference/search/explain.asciidoc +++ b/docs/reference/search/explain.asciidoc @@ -96,13 +96,9 @@ This will yield the same result as the previous request. string. Defaults to the analyzer of the _all field. `analyze_wildcard`:: - Should wildcard and prefix queries be analyzed or + Should wildcard and prefix queries be tokenized or not. Defaults to false. -`lowercase_expanded_terms`:: - Should terms be automatically lowercased - or not. Defaults to true. - `lenient`:: If set to true will cause format based failures (like providing text to a numeric field) to be ignored. Defaults to false. diff --git a/docs/reference/search/uri-request.asciidoc b/docs/reference/search/uri-request.asciidoc index ba36992f6fbb9..3bba1e3772b69 100644 --- a/docs/reference/search/uri-request.asciidoc +++ b/docs/reference/search/uri-request.asciidoc @@ -64,10 +64,7 @@ query. |`analyzer` |The analyzer name to be used when analyzing the query string. -|`lowercase_expanded_terms` |Should terms be automatically lowercased or -not. Defaults to `true`. - -|`analyze_wildcard` |Should wildcard and prefix queries be analyzed or +|`analyze_wildcard` |Should wildcard and prefix queries be tokenized or not. Defaults to `false`. |`default_operator` |The default operator to be used, can be `AND` or diff --git a/docs/reference/search/validate.asciidoc b/docs/reference/search/validate.asciidoc index 8a2730c0148d2..f95374053859b 100644 --- a/docs/reference/search/validate.asciidoc +++ b/docs/reference/search/validate.asciidoc @@ -52,10 +52,7 @@ query. |`lenient` |If set to true will cause format based failures (like providing text to a numeric field) to be ignored. Defaults to false. -|`lowercase_expanded_terms` |Should terms be automatically lowercased or -not. Defaults to `true`. - -|`analyze_wildcard` |Should wildcard and prefix queries be analyzed or +|`analyze_wildcard` |Should wildcard and prefix queries be tokenized or not. Defaults to `false`. |======================================================================= diff --git a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java index 21d9b8040550a..ce5b9b218508b 100644 --- a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java +++ b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java @@ -31,7 +31,7 @@ /** */ -public class KuromojiAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class KuromojiAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final JapaneseAnalyzer analyzer; diff --git a/plugins/analysis-smartcn/src/main/java/org/elasticsearch/index/analysis/SmartChineseAnalyzerProvider.java b/plugins/analysis-smartcn/src/main/java/org/elasticsearch/index/analysis/SmartChineseAnalyzerProvider.java index 591912b8fa38f..d7ed36f3235d6 100644 --- a/plugins/analysis-smartcn/src/main/java/org/elasticsearch/index/analysis/SmartChineseAnalyzerProvider.java +++ b/plugins/analysis-smartcn/src/main/java/org/elasticsearch/index/analysis/SmartChineseAnalyzerProvider.java @@ -26,7 +26,7 @@ /** */ -public class SmartChineseAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class SmartChineseAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final SmartChineseAnalyzer analyzer; diff --git a/plugins/analysis-stempel/src/main/java/org/elasticsearch/index/analysis/pl/PolishAnalyzerProvider.java b/plugins/analysis-stempel/src/main/java/org/elasticsearch/index/analysis/pl/PolishAnalyzerProvider.java index d80939cea0451..ebc5cfae5699e 100644 --- a/plugins/analysis-stempel/src/main/java/org/elasticsearch/index/analysis/pl/PolishAnalyzerProvider.java +++ b/plugins/analysis-stempel/src/main/java/org/elasticsearch/index/analysis/pl/PolishAnalyzerProvider.java @@ -27,7 +27,7 @@ /** */ -public class PolishAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class PolishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final PolishAnalyzer analyzer; diff --git a/plugins/mapper-size/src/main/java/org/elasticsearch/index/mapper/size/SizeFieldMapper.java b/plugins/mapper-size/src/main/java/org/elasticsearch/index/mapper/size/SizeFieldMapper.java index 9c07c7b3eb33d..00432c6c9d70a 100644 --- a/plugins/mapper-size/src/main/java/org/elasticsearch/index/mapper/size/SizeFieldMapper.java +++ b/plugins/mapper-size/src/main/java/org/elasticsearch/index/mapper/size/SizeFieldMapper.java @@ -55,15 +55,13 @@ public static class Defaults { static { SIZE_FIELD_TYPE.setStored(true); SIZE_FIELD_TYPE.setName(NAME); - SIZE_FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - SIZE_FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); SIZE_FIELD_TYPE.freeze(); LEGACY_SIZE_FIELD_TYPE.setStored(true); LEGACY_SIZE_FIELD_TYPE.setNumericPrecisionStep(LegacyIntegerFieldMapper.Defaults.PRECISION_STEP_32_BIT); LEGACY_SIZE_FIELD_TYPE.setName(NAME); LEGACY_SIZE_FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - LEGACY_SIZE_FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + LEGACY_SIZE_FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); LEGACY_SIZE_FIELD_TYPE.freeze(); } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/count/20_query_string.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/count/20_query_string.yaml index 933033761e9c1..70f402691a379 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/count/20_query_string.yaml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/count/20_query_string.yaml @@ -58,15 +58,6 @@ count: index: test q: field:BA* - lowercase_expanded_terms: false - - - match: {count : 0} - - - do: - count: - index: test - q: field:BA* - analyze_wildcard: true - match: {count : 1} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/explain/30_query_string.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/explain/30_query_string.yaml index 30fe6cc55b621..78ef8c4bc8942 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/explain/30_query_string.yaml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/explain/30_query_string.yaml @@ -68,17 +68,6 @@ type: test id: 1 q: field:BA* - lowercase_expanded_terms: false - - - is_false: matched - - - do: - explain: - index: test - type: test - id: 1 - q: field:BA* - analyze_wildcard: true - is_true: matched diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/60_query_string.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/60_query_string.yaml index 6fb93bb10449a..8533cfd26686d 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/60_query_string.yaml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/60_query_string.yaml @@ -58,15 +58,6 @@ search: index: test q: field:BA* - lowercase_expanded_terms: false - - - match: {hits.total: 0} - - - do: - search: - index: test - q: field:BA* - analyze_wildcard: true - match: {hits.total: 1}