diff --git a/docs/reference/api-conventions.asciidoc b/docs/reference/api-conventions.asciidoc index 28e4ba71779f2..0f1799fc3cf36 100644 --- a/docs/reference/api-conventions.asciidoc +++ b/docs/reference/api-conventions.asciidoc @@ -122,6 +122,21 @@ fields within a document indexed treated as boolean fields. All REST APIs support providing numbered parameters as `string` on top of supporting the native JSON number types. +[[time-units]] +[float] +=== Time units + +Whenever durations need to be specified, eg for a `timeout` parameter, the duration +can be specified as a whole number representing time in milliseconds, or as a time value like `2d` for 2 days. The supported units are: + +[horizontal] +`y`:: Year +`M`:: Month +`w`:: Week +`h`:: Hour +`m`:: Minute +`s`:: Second + [[distance-units]] [float] === Distance Units @@ -144,6 +159,63 @@ Centimeter:: `cm` or `centimeters` Millimeter:: `mm` or `millimeters` +[[fuzziness]] +[float] +=== Fuzziness + +Some queries and APIs support parameters to allow inexact _fuzzy_ matching, +using the `fuzziness` parameter. The `fuzziness` parameter is context +sensitive which means that it depends on the type of the field being queried: + +[float] +==== Numeric, date and IPv4 fields + +When querying numeric, date and IPv4 fields, `fuzziness` is interpreted as a +`+/- margin. It behaves like a <> where: + + -fuzziness <= field value <= +fuzziness + +The `fuzziness` parameter should be set to a numeric value, eg `2` or `2.0`. A +`date` field interprets a long as milliseconds, but also accepts a string +containing a time value -- `"1h"` -- as explained in <>. An `ip` +field accepts a long or another IPv4 address (which will be converted into a +long). + +[float] +==== String fields + +When querying `string` fields, `fuzziness` is interpreted as a +http://en.wikipedia.org/wiki/Levenshtein_distance[Levenshtein Edit Distance] +-- the number of one character changes that need to be made to one string to +make it the same as another string. + +The `fuzziness` parameter can be specified as: + +`0`, `1`, `2`:: + +the maximum allowed Levenshtein Edit Distance (or number of edits) + +`AUTO`:: ++ +-- +generates an edit distance based on the length of the term. For lengths: + +`0..1`:: must match exactly +`1..4`:: one edit allowed +`>4`:: two edits allowed + +`AUTO` should generally be the preferred value for `fuzziness`. +-- + +`0.0..1.0`:: + +converted into an edit distance using the formula: `length(term) * (1.0 - +fuzziness)`, eg a `fuzziness` of `0.6` with a term of length 10 would result +in an edit distance of `4`. Note: in all APIs except for the +<>, the maximum allowed edit distance is `2`. + + + [float] === Result Casing diff --git a/docs/reference/query-dsl/queries/flt-field-query.asciidoc b/docs/reference/query-dsl/queries/flt-field-query.asciidoc index 734983c338976..205dc61307d55 100644 --- a/docs/reference/query-dsl/queries/flt-field-query.asciidoc +++ b/docs/reference/query-dsl/queries/flt-field-query.asciidoc @@ -33,8 +33,8 @@ The `fuzzy_like_this_field` top level parameters include: |`max_query_terms` |The maximum number of query terms that will be included in any generated query. Defaults to `25`. -|`min_similarity` |The minimum similarity of the term variants. Defaults -to `0.5`. +|`fuzziness` |The fuzziness of the term variants. Defaults +to `0.5`. See <>. |`prefix_length` |Length of required common prefix on variant terms. Defaults to `0`. diff --git a/docs/reference/query-dsl/queries/flt-query.asciidoc b/docs/reference/query-dsl/queries/flt-query.asciidoc index beb49ea9367ab..231de6b6c048d 100644 --- a/docs/reference/query-dsl/queries/flt-query.asciidoc +++ b/docs/reference/query-dsl/queries/flt-query.asciidoc @@ -32,8 +32,8 @@ Defaults to the `_all` field. |`max_query_terms` |The maximum number of query terms that will be included in any generated query. Defaults to `25`. -|`min_similarity` |The minimum similarity of the term variants. Defaults -to `0.5`. +|`fuzziness` |The minimum similarity of the term variants. Defaults +to `0.5`. See <>. |`prefix_length` |Length of required common prefix on variant terms. Defaults to `0`. diff --git a/docs/reference/query-dsl/queries/fuzzy-query.asciidoc b/docs/reference/query-dsl/queries/fuzzy-query.asciidoc index 86a1062d16922..082f3f1a123f7 100644 --- a/docs/reference/query-dsl/queries/fuzzy-query.asciidoc +++ b/docs/reference/query-dsl/queries/fuzzy-query.asciidoc @@ -1,12 +1,15 @@ [[query-dsl-fuzzy-query]] === Fuzzy Query -A fuzzy query that uses similarity based on Levenshtein (edit -distance) algorithm. This maps to Lucene's `FuzzyQuery`. +The fuzzy query uses similarity based on Levenshtein edit distance for +`string` fields, and a `+/-` margin on numeric and date fields. -Warning: this query is not very scalable with its default prefix length -of 0 - in this case, *every* term will be enumerated and cause an edit -score calculation or `max_expansions` is not set. +==== String fields + +The `fuzzy` query generates all possible matching terms that are within the +maximum edit distance specified in `fuzziness` and then checks the term +dictionary to find out which of those generated terms actually exist in the +index. Here is a simple example: @@ -17,31 +20,57 @@ Here is a simple example: } -------------------------------------------------- -More complex settings can be set (the values here are the default -values): +Or with more advanced settings: [source,js] -------------------------------------------------- - { - "fuzzy" : { - "user" : { - "value" : "ki", - "boost" : 1.0, - "min_similarity" : 0.5, - "prefix_length" : 0 - } +{ + "fuzzy" : { + "user" : { + "value" : "ki", + "boost" : 1.0, + "fuzziness" : 2, + "prefix_length" : 0, + "max_expansions": 100 } } +} -------------------------------------------------- -The `max_expansions` parameter (unbounded by default) controls the -number of terms the fuzzy query will expand to. +[float] +===== Parameters + +[horizontal] +`fuzziness`:: + + The maximum edit distance. Defaults to `AUTO`. See <>. + +`prefix_length`:: + + The number of initial characters which will not be ``fuzzified''. This + helps to reduce the number of terms which must be examined. Defaults + to `0`. + +`max_expansions`:: + + The maximum number of terms that the `fuzzy` query will expand to. + Defaults to `0`. + + +WARNING: this query can be very heavy if `prefix_length` and `max_expansions` +are both set to their defaults of `0`. This could cause every term in the +index to be examined! + [float] -==== Numeric / Date Fuzzy +==== Numeric and date fields + +Performs a <> ``around'' the value using the +`fuzziness` value as a `+/-` range, where: + + -fuzziness <= field value <= +fuzziness -`fuzzy` query on a numeric field will result in a range query "around" -the value using the `min_similarity` value. For example: +For example: [source,js] -------------------------------------------------- @@ -49,14 +78,14 @@ the value using the `min_similarity` value. For example: "fuzzy" : { "price" : { "value" : 12, - "min_similarity" : 2 + "fuzziness" : 2 } } } -------------------------------------------------- -Will result in a range query between 10 and 14. Same applies to dates, -with support for time format for the `min_similarity` field: +Will result in a range query between 10 and 14. Date fields support +<>, eg: [source,js] -------------------------------------------------- @@ -64,16 +93,10 @@ with support for time format for the `min_similarity` field: "fuzzy" : { "created" : { "value" : "2010-02-05T12:05:07", - "min_similarity" : "1d" + "fuzziness" : "1d" } } } -------------------------------------------------- -In the mapping, numeric and date types now allow to configure a -`fuzzy_factor` mapping value (defaults to 1), which will be used to -multiply the fuzzy value by it when used in a `query_string` type query. -For example, for dates, a fuzzy factor of "1d" will result in -multiplying whatever fuzzy value provided in the min_similarity by it. -Note, this is explicitly supported since query_string query only allowed -for similarity valued between 0.0 and 1.0. +See <> for more details about accepted values. diff --git a/docs/reference/query-dsl/queries/match-query.asciidoc b/docs/reference/query-dsl/queries/match-query.asciidoc index 5460cbff1e448..2bf8c8dff7853 100644 --- a/docs/reference/query-dsl/queries/match-query.asciidoc +++ b/docs/reference/query-dsl/queries/match-query.asciidoc @@ -34,9 +34,10 @@ The `analyzer` can be set to control which analyzer will perform the analysis process on the text. It default to the field explicit mapping definition, or the default search analyzer. -`fuzziness` can be set to a value (depending on the relevant type, for -string types it should be a value between `0.0` and `1.0`) to constructs -fuzzy queries for each term analyzed. The `prefix_length` and +`fuzziness` allows _fuzzy matching_ based on the type of field being queried. +See <> for allowed settings. + +The `prefix_length` and `max_expansions` can be set in this case to control the fuzzy process. If the fuzzy option is set the query will use `constant_score_rewrite` as its <> for allowed settings. |`fuzzy_prefix_length` |Set the prefix length for fuzzy queries. Default is `0`. @@ -70,7 +70,7 @@ in the resulting boolean query should match. It can be an absolute value both>>. |`lenient` |If set to `true` will cause format based failures (like -providing text to a numeric field) to be ignored. +providing text to a numeric field) to be ignored. |======================================================================= When a multi term query is being generated, one can control how it gets @@ -128,7 +128,7 @@ search on all "city" fields: Another option is to provide the wildcard fields search in the query string itself (properly escaping the `*` sign), for example: -`city.\*:something`. +`city.\*:something`. When running the `query_string` query against multiple fields, the following additional parameters are allowed: diff --git a/docs/reference/search/suggesters/completion-suggest.asciidoc b/docs/reference/search/suggesters/completion-suggest.asciidoc index da1b7fc2f339e..7c672db4418ab 100644 --- a/docs/reference/search/suggesters/completion-suggest.asciidoc +++ b/docs/reference/search/suggesters/completion-suggest.asciidoc @@ -199,7 +199,7 @@ curl -X POST 'localhost:9200/music/_suggest?pretty' -d '{ "completion" : { "field" : "suggest", "fuzzy" : { - "edit_distance" : 2 + "fuzziness" : 2 } } } @@ -210,8 +210,9 @@ The fuzzy query can take specific fuzzy parameters. The following parameters are supported: [horizontal] -`edit_distance`:: - Maximum edit distance, defaults to `1` +`fuzziness`:: + The fuzziness factor, defaults to `AUTO`. + See <> for allowed settings. `transpositions`:: Sets if transpositions should be counted diff --git a/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java index c007a5922f54d..75e2af0f909da 100644 --- a/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java +++ b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java @@ -30,6 +30,7 @@ import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.lucene.search.XFilteredQuery; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.QueryParseContext; @@ -435,7 +436,7 @@ private Query getFuzzyQuerySingle(String field, String termStr, String minSimila if (currentMapper != null) { try { //LUCENE 4 UPGRADE I disabled transpositions here by default - maybe this needs to be changed - Query fuzzyQuery = currentMapper.fuzzyQuery(termStr, minSimilarity, fuzzyPrefixLength, settings.fuzzyMaxExpansions(), false); + Query fuzzyQuery = currentMapper.fuzzyQuery(termStr, Fuzziness.build(minSimilarity), fuzzyPrefixLength, settings.fuzzyMaxExpansions(), false); return wrapSmartNameQuery(fuzzyQuery, fieldMappers, parseContext); } catch (RuntimeException e) { if (settings.lenient()) { diff --git a/src/main/java/org/elasticsearch/common/ParseField.java b/src/main/java/org/elasticsearch/common/ParseField.java new file mode 100644 index 0000000000000..6e89e747890b9 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/ParseField.java @@ -0,0 +1,74 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.common; + +import org.elasticsearch.ElasticsearchIllegalArgumentException; + +import java.util.EnumSet; +import java.util.HashSet; + +/** + */ +public class ParseField { + private final String camelCaseName; + private final String underscoreName; + private final String[] deprecatedNames; + + public static final EnumSet EMPTY_FLAGS = EnumSet.noneOf(Flag.class); + + public static enum Flag { + STRICT + } + + public ParseField(String value, String... deprecatedNames) { + camelCaseName = Strings.toCamelCase(value); + underscoreName = Strings.toUnderscoreCase(value); + if (deprecatedNames == null || deprecatedNames.length == 0) { + this.deprecatedNames = Strings.EMPTY_ARRAY; + } else { + final HashSet set = new HashSet(); + for (String depName : deprecatedNames) { + set.add(Strings.toCamelCase(depName)); + set.add(Strings.toUnderscoreCase(depName)); + } + this.deprecatedNames = set.toArray(new String[0]); + } + } + + public ParseField withDeprecation(String... deprecatedNames) { + return new ParseField(this.underscoreName, deprecatedNames); + } + + public boolean match(String currentFieldName, EnumSet flags) { + if (currentFieldName.equals(camelCaseName) || currentFieldName.equals(underscoreName)) { + return true; + } + for (String depName : deprecatedNames) { + if (currentFieldName.equals(depName)) { + if (flags.contains(Flag.STRICT)) { + throw new ElasticsearchIllegalArgumentException("Deprecated field [" + currentFieldName + "] used expected [" + underscoreName + "] instead"); + } + return true; + } + } + return false; + } + + +} diff --git a/src/main/java/org/elasticsearch/common/unit/Fuzziness.java b/src/main/java/org/elasticsearch/common/unit/Fuzziness.java new file mode 100644 index 0000000000000..712b37abcf906 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/unit/Fuzziness.java @@ -0,0 +1,256 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.common.unit; + +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.util.automaton.LevenshteinAutomata; +import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.Preconditions; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentBuilderString; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; + +/** + * A unit class that encapsulates all in-exact search + * parsing and conversion from similarities to edit distances + * etc. + */ +public final class Fuzziness implements ToXContent { + + public static final XContentBuilderString X_FIELD_NAME = new XContentBuilderString("fuzziness"); + public static final Fuzziness ZERO = new Fuzziness(0); + public static final Fuzziness ONE = new Fuzziness(1); + public static final Fuzziness TWO = new Fuzziness(2); + public static final Fuzziness AUTO = new Fuzziness("AUTO"); + public static final ParseField FIELD = new ParseField(X_FIELD_NAME.camelCase().getValue()); + + private final Object fuzziness; + + private Fuzziness(int fuzziness) { + Preconditions.checkArgument(fuzziness >= 0 && fuzziness <= 2, "Valid edit distances are [0, 1, 2] but was [" + fuzziness + "]"); + this.fuzziness = fuzziness; + } + + private Fuzziness(float fuzziness) { + Preconditions.checkArgument(fuzziness >= 0.0 && fuzziness < 1.0f, "Valid similarities must be in the interval [0..1] but was [" + fuzziness + "]"); + this.fuzziness = fuzziness; + } + + private Fuzziness(String fuzziness) { + this.fuzziness = fuzziness; + } + + /** + * Creates a {@link Fuzziness} instance from a similarity. The value must be in the range [0..1) + */ + public static Fuzziness fromSimilarity(float similarity) { + return new Fuzziness(similarity); + } + + /** + * Creates a {@link Fuzziness} instance from an edit distance. The value must be one of [0, 1, 2] + */ + public static Fuzziness fromEdits(int edits) { + return new Fuzziness(edits); + } + + public static Fuzziness build(Object fuzziness) { + if (fuzziness instanceof Fuzziness) { + return (Fuzziness) fuzziness; + } + String string = fuzziness.toString(); + if (AUTO.asString().equalsIgnoreCase(string)) { + return AUTO; + } + return new Fuzziness(string); + } + + public static Fuzziness parse(XContentParser parser) throws IOException { + XContentParser.Token token = parser.currentToken(); + switch (token) { + case VALUE_STRING: + case VALUE_NUMBER: + final String fuzziness = parser.text(); + if (AUTO.asString().equalsIgnoreCase(fuzziness)) { + return AUTO; + } + try { + final int minimumSimilarity = Integer.parseInt(fuzziness); + switch (minimumSimilarity) { + case 0: + return ZERO; + case 1: + return ONE; + case 2: + return TWO; + default: + return build(fuzziness); + } + } catch (NumberFormatException ex) { + return build(fuzziness); + } + + default: + throw new ElasticsearchIllegalArgumentException("Can't parse fuzziness on token: [" + token + "]"); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + return toXContent(builder, params, true); + } + + public XContentBuilder toXContent(XContentBuilder builder, Params params, boolean includeFieldName) throws IOException { + if (includeFieldName) { + builder.field(X_FIELD_NAME, fuzziness); + } else { + builder.value(fuzziness); + } + return builder; + } + + public int asDistance() { + return asDistance(null); + } + + public int asDistance(String text) { + if (fuzziness instanceof String) { + if (this == AUTO) { //AUTO + final int len = termLen(text); + if (len <= 2) { + return 0; + } else if (len > 5) { + return 2; + } else { + return 1; + } + } + } + return FuzzyQuery.floatToEdits(asFloat(), termLen(text)); + } + + public TimeValue asTimeValue() { + if (this == AUTO) { + return TimeValue.timeValueMillis(1); + } else { + return TimeValue.parseTimeValue(fuzziness.toString(), null); + } + } + + public long asLong() { + if (this == AUTO) { + return 1; + } + try { + return Long.parseLong(fuzziness.toString()); + } catch (NumberFormatException ex) { + return (long) Double.parseDouble(fuzziness.toString()); + } + } + + public int asInt() { + if (this == AUTO) { + return 1; + } + try { + return Integer.parseInt(fuzziness.toString()); + } catch (NumberFormatException ex) { + return (int) Float.parseFloat(fuzziness.toString()); + } + } + + public short asShort() { + if (this == AUTO) { + return 1; + } + try { + return Short.parseShort(fuzziness.toString()); + } catch (NumberFormatException ex) { + return (short) Float.parseFloat(fuzziness.toString()); + } + } + + public byte asByte() { + if (this == AUTO) { + return 1; + } + try { + return Byte.parseByte(fuzziness.toString()); + } catch (NumberFormatException ex) { + return (byte) Float.parseFloat(fuzziness.toString()); + } + } + + public double asDouble() { + if (this == AUTO) { + return 1d; + } + return Double.parseDouble(fuzziness.toString()); + } + + public float asFloat() { + if (this == AUTO) { + return 1f; + } + return Float.parseFloat(fuzziness.toString()); + } + + public float asSimilarity() { + return asSimilarity(null); + } + + public float asSimilarity(String text) { + if (this == AUTO) { + final int len = termLen(text); + if (len <= 2) { + return 0.0f; + } else if (len > 5) { + return 0.5f; + } else { + return 0.66f; + } +// return dist == 0 ? dist : Math.min(0.999f, Math.max(0.0f, 1.0f - ((float) dist/ (float) termLen(text)))); + } + if (fuzziness instanceof Float) { // it's a similarity + return ((Float) fuzziness).floatValue(); + } else if (fuzziness instanceof Integer) { // it's an edit! + int dist = Math.min(((Integer) fuzziness).intValue(), + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); + return Math.min(0.999f, Math.max(0.0f, 1.0f - ((float) dist / (float) termLen(text)))); + } else { + final float similarity = Float.parseFloat(fuzziness.toString()); + if (similarity >= 0.0f && similarity < 1.0f) { + return similarity; + } + } + throw new ElasticsearchIllegalArgumentException("Can't get similarity from fuzziness [" + fuzziness + "]"); + } + + private int termLen(String text) { + return text == null ? 5 : text.codePointCount(0, text.length()); // 5 avg term length in english + } + + public String asString() { + return fuzziness.toString(); + } +} diff --git a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 6b9abf19628e2..181053fb58e12 100644 --- a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -28,6 +28,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider; import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider; import org.elasticsearch.index.fielddata.FieldDataType; @@ -214,7 +215,7 @@ public static Loading parse(String loading, Loading defaultValue) { Filter rangeFilter(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context); - Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions); + Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions); Query prefixQuery(Object value, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java index 06631f8160fb0..51b2f07823dbc 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java @@ -37,6 +37,7 @@ import org.elasticsearch.common.lucene.search.RegexpFilter; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider; @@ -466,9 +467,8 @@ public Filter rangeFilter(Object lowerTerm, Object upperTerm, boolean includeLow } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { - int edits = FuzzyQuery.floatToEdits(Float.parseFloat(minSim), value.codePointCount(0, value.length())); - return new FuzzyQuery(names.createIndexNameTerm(indexedValueForSearch(value)), edits, prefixLength, maxExpansions, transpositions); + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { + return new FuzzyQuery(names.createIndexNameTerm(indexedValueForSearch(value)), fuzziness.asDistance(value), prefixLength, maxExpansions, transpositions); } @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java index a1e92c87b9298..6ceeada7414d7 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java @@ -34,6 +34,7 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -181,14 +182,9 @@ private int parseValueAsInt(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { byte iValue = Byte.parseByte(value); - byte iSim; - try { - iSim = Byte.parseByte(minSim); - } catch (NumberFormatException e) { - iSim = (byte) Float.parseFloat(minSim); - } + byte iSim = fuzziness.asByte(); return NumericRangeQuery.newIntRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java index bc8847f84d2a4..81d1a09cf1561 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java @@ -36,7 +36,7 @@ import org.elasticsearch.common.joda.FormatDateTimeFormatter; import org.elasticsearch.common.joda.Joda; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -291,14 +291,14 @@ private String convertToString(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { long iValue = dateMathParser.parse(value, System.currentTimeMillis()); long iSim; try { - iSim = TimeValue.parseTimeValue(minSim, null).millis(); + iSim = fuzziness.asTimeValue().millis(); } catch (Exception e) { // not a time format - iSim = (long) Double.parseDouble(minSim); + iSim = fuzziness.asLong(); } return NumericRangeQuery.newLongRange(names.indexName(), precisionStep, iValue - iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java index 57cc74db92d9f..4f5e38b3dcbe7 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java @@ -36,6 +36,7 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Numbers; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.util.ByteUtils; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -171,9 +172,9 @@ public BytesRef indexedValueForSearch(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { double iValue = Double.parseDouble(value); - double iSim = Double.parseDouble(minSim); + double iSim = fuzziness.asDouble(); return NumericRangeQuery.newDoubleRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java index ceb11c9cf6920..203b1eb0ba659 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java @@ -37,6 +37,7 @@ import org.elasticsearch.common.Numbers; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.util.ByteUtils; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -181,9 +182,9 @@ private float parseValue(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { float iValue = Float.parseFloat(value); - float iSim = Float.parseFloat(minSim); + final float iSim = fuzziness.asFloat(); return NumericRangeQuery.newFloatRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java index 0c76b8bc316a8..c999bdfce132a 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java @@ -35,6 +35,7 @@ import org.elasticsearch.common.Numbers; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NumericIntegerAnalyzer; @@ -176,14 +177,9 @@ private int parseValue(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { int iValue = Integer.parseInt(value); - int iSim; - try { - iSim = Integer.parseInt(minSim); - } catch (NumberFormatException e) { - iSim = (int) Float.parseFloat(minSim); - } + int iSim = fuzziness.asInt(); return NumericRangeQuery.newIntRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java index ad7ec404568d6..639fcd7f343b3 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java @@ -35,6 +35,7 @@ import org.elasticsearch.common.Numbers; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NumericLongAnalyzer; @@ -165,14 +166,9 @@ public BytesRef indexedValueForSearch(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { long iValue = Long.parseLong(value); - long iSim; - try { - iSim = Long.parseLong(minSim); - } catch (NumberFormatException e) { - iSim = (long) Double.parseDouble(minSim); - } + final long iSim = fuzziness.asLong(); return NumericRangeQuery.newLongRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java index ec6af5644f5a6..7fc7f96bb0772 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java @@ -39,6 +39,7 @@ import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.util.ByteUtils; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -239,7 +240,7 @@ public Filter termFilter(Object value, @Nullable QueryParseContext context) { public abstract Filter rangeFilter(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context); @Override - public abstract Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions); + public abstract Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions); /** * A range filter based on the field data cache. diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java index 4820f248250b1..1d75ec0984366 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java @@ -35,6 +35,7 @@ import org.elasticsearch.common.Numbers; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -180,14 +181,9 @@ private int parseValueAsInt(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { short iValue = Short.parseShort(value); - short iSim; - try { - iSim = Short.parseShort(minSim); - } catch (NumberFormatException e) { - iSim = (short) Float.parseFloat(minSim); - } + short iSim = fuzziness.asShort(); return NumericRangeQuery.newIntRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java index 294fd7f6d2355..32b3a50efad6b 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java @@ -32,6 +32,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NumericFloatAnalyzer; @@ -183,9 +184,9 @@ private float parseValue(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { float iValue = Float.parseFloat(value); - float iSim = Float.parseFloat(minSim); + float iSim = fuzziness.asFloat(); return NumericRangeQuery.newFloatRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java index bfc5ea088cf16..f7ad916219072 100644 --- a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java @@ -34,6 +34,7 @@ import org.elasticsearch.common.Numbers; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -216,17 +217,13 @@ private long parseValue(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { long iValue = ipToLong(value); long iSim; try { - iSim = ipToLong(minSim); + iSim = ipToLong(fuzziness.asString()); } catch (ElasticsearchIllegalArgumentException e) { - try { - iSim = Long.parseLong(minSim); - } catch (NumberFormatException e1) { - iSim = (long) Double.parseDouble(minSim); - } + iSim = fuzziness.asLong(); } return NumericRangeQuery.newLongRange(names.indexName(), precisionStep, iValue - iSim, diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryBuilder.java index e0d007e1195c1..f9846d0044bce 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryBuilder.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.query; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; @@ -34,7 +35,7 @@ public class FuzzyLikeThisFieldQueryBuilder extends BaseQueryBuilder implements private Float boost; private String likeText = null; - private Float minSimilarity; + private Fuzziness fuzziness; private Integer prefixLength; private Integer maxQueryTerms; private Boolean ignoreTF; @@ -59,8 +60,8 @@ public FuzzyLikeThisFieldQueryBuilder likeText(String likeText) { return this; } - public FuzzyLikeThisFieldQueryBuilder minSimilarity(float minSimilarity) { - this.minSimilarity = minSimilarity; + public FuzzyLikeThisFieldQueryBuilder fuzziness(Fuzziness fuzziness) { + this.fuzziness = fuzziness; return this; } @@ -119,8 +120,8 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep if (maxQueryTerms != null) { builder.field("max_query_terms", maxQueryTerms); } - if (minSimilarity != null) { - builder.field("min_similarity", minSimilarity); + if (fuzziness != null) { + fuzziness.toXContent(builder, params); } if (prefixLength != null) { builder.field("prefix_length", prefixLength); diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryParser.java b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryParser.java index b2fcc39fc0b5b..8088281553d68 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryParser.java @@ -23,8 +23,10 @@ import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery; import org.apache.lucene.search.Query; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.Analysis; import org.elasticsearch.index.mapper.MapperService; @@ -48,6 +50,8 @@ public class FuzzyLikeThisFieldQueryParser implements QueryParser { public static final String NAME = "flt_field"; + private static final Fuzziness DEFAULT_FUZZINESS = Fuzziness.fromSimilarity(0.5f); + private static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("min_similarity"); @Inject public FuzzyLikeThisFieldQueryParser() { @@ -65,7 +69,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars int maxNumTerms = 25; float boost = 1.0f; String likeText = null; - float minSimilarity = 0.5f; + Fuzziness fuzziness = DEFAULT_FUZZINESS; int prefixLength = 0; boolean ignoreTF = false; Analyzer analyzer = null; @@ -98,8 +102,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars boost = parser.floatValue(); } else if ("ignore_tf".equals(currentFieldName) || "ignoreTF".equals(currentFieldName)) { ignoreTF = parser.booleanValue(); - } else if ("min_similarity".equals(currentFieldName) || "minSimilarity".equals(currentFieldName)) { - minSimilarity = parser.floatValue(); + } else if (FUZZINESS.match(currentFieldName, parseContext.parseFlags())) { + fuzziness = Fuzziness.parse(parser); } else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) { prefixLength = parser.intValue(); } else if ("analyzer".equals(currentFieldName)) { @@ -139,7 +143,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars } FuzzyLikeThisQuery fuzzyLikeThisQuery = new FuzzyLikeThisQuery(maxNumTerms, analyzer); - fuzzyLikeThisQuery.addTerms(likeText, fieldName, minSimilarity, prefixLength); + fuzzyLikeThisQuery.addTerms(likeText, fieldName, fuzziness.asSimilarity(), prefixLength); fuzzyLikeThisQuery.setBoost(boost); fuzzyLikeThisQuery.setIgnoreTF(ignoreTF); @@ -156,4 +160,4 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars } return query; } -} \ No newline at end of file +} diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryBuilder.java index 7b0b20af13156..fafe60e1edb52 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryBuilder.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.query; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; @@ -34,7 +35,7 @@ public class FuzzyLikeThisQueryBuilder extends BaseQueryBuilder implements Boost private Float boost; private String likeText = null; - private Float minSimilarity; + private Fuzziness fuzziness; private Integer prefixLength; private Integer maxQueryTerms; private Boolean ignoreTF; @@ -66,8 +67,8 @@ public FuzzyLikeThisQueryBuilder likeText(String likeText) { return this; } - public FuzzyLikeThisQueryBuilder minSimilarity(float minSimilarity) { - this.minSimilarity = minSimilarity; + public FuzzyLikeThisQueryBuilder fuzziness(Fuzziness fuzziness) { + this.fuzziness = fuzziness; return this; } @@ -132,8 +133,8 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep if (maxQueryTerms != null) { builder.field("max_query_terms", maxQueryTerms); } - if (minSimilarity != null) { - builder.field("min_similarity", minSimilarity); + if (fuzziness != null) { + fuzziness.toXContent(builder, params); } if (prefixLength != null) { builder.field("prefix_length", prefixLength); diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryParser.java b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryParser.java index 73754cfb6aceb..610a136b3920d 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryParser.java @@ -24,7 +24,9 @@ import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery; import org.apache.lucene.search.Query; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.Analysis; @@ -47,6 +49,7 @@ public class FuzzyLikeThisQueryParser implements QueryParser { public static final String NAME = "flt"; + private static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("min_similarity"); @Inject public FuzzyLikeThisQueryParser() { @@ -65,7 +68,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars float boost = 1.0f; List fields = null; String likeText = null; - float minSimilarity = 0.5f; + Fuzziness fuzziness = Fuzziness.TWO; int prefixLength = 0; boolean ignoreTF = false; Analyzer analyzer = null; @@ -86,8 +89,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars boost = parser.floatValue(); } else if ("ignore_tf".equals(currentFieldName) || "ignoreTF".equals(currentFieldName)) { ignoreTF = parser.booleanValue(); - } else if ("min_similarity".equals(currentFieldName) || "minSimilarity".equals(currentFieldName)) { - minSimilarity = parser.floatValue(); + } else if (FUZZINESS.match(currentFieldName, parseContext.parseFlags())) { + fuzziness = Fuzziness.parse(parser); } else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) { prefixLength = parser.intValue(); } else if ("analyzer".equals(currentFieldName)) { @@ -139,7 +142,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars return null; } for (String field : fields) { - query.addTerms(likeText, field, minSimilarity, prefixLength); + query.addTerms(likeText, field, fuzziness.asSimilarity(), prefixLength); } query.setBoost(boost); query.setIgnoreTF(ignoreTF); diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java index c675c169befdb..ab158fb1acc68 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.query; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; @@ -36,7 +37,7 @@ public class FuzzyQueryBuilder extends BaseQueryBuilder implements MultiTermQuer private float boost = -1; - private String minSimilarity; + private Fuzziness fuzziness; private Integer prefixLength; @@ -67,13 +68,8 @@ public FuzzyQueryBuilder boost(float boost) { return this; } - public FuzzyQueryBuilder minSimilarity(float defaultMinSimilarity) { - this.minSimilarity = Float.toString(defaultMinSimilarity); - return this; - } - - public FuzzyQueryBuilder minSimilarity(String defaultMinSimilarity) { - this.minSimilarity = defaultMinSimilarity; + public FuzzyQueryBuilder fuzziness(Fuzziness fuzziness) { + this.fuzziness = fuzziness; return this; } @@ -103,7 +99,7 @@ public FuzzyQueryBuilder queryName(String queryName) { @Override public void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(FuzzyQueryParser.NAME); - if (boost == -1 && minSimilarity == null && prefixLength == null && queryName != null) { + if (boost == -1 && fuzziness == null && prefixLength == null && queryName != null) { builder.field(name, value); } else { builder.startObject(name); @@ -114,8 +110,8 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio if (transpositions != null) { builder.field("transpositions", transpositions); } - if (minSimilarity != null) { - builder.field("min_similarity", minSimilarity); + if (fuzziness != null) { + fuzziness.toXContent(builder, params); } if (prefixLength != null) { builder.field("prefix_length", prefixLength); diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java b/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java index cf0d540cd9dc8..1d13c3ca06f9b 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java @@ -23,7 +23,9 @@ import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.support.QueryParsers; @@ -38,6 +40,9 @@ public class FuzzyQueryParser implements QueryParser { public static final String NAME = "fuzzy"; + private static final Fuzziness DEFAULT_FUZZINESS = Fuzziness.AUTO; + private static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("min_similarity"); + @Inject public FuzzyQueryParser() { @@ -60,8 +65,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars String value = null; float boost = 1.0f; - //LUCENE 4 UPGRADE we should find a good default here I'd vote for 1.0 -> 1 edit - String minSimilarity = "0.5"; + Fuzziness fuzziness = DEFAULT_FUZZINESS; int prefixLength = FuzzyQuery.defaultPrefixLength; int maxExpansions = FuzzyQuery.defaultMaxExpansions; boolean transpositions = false; @@ -80,8 +84,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars value = parser.text(); } else if ("boost".equals(currentFieldName)) { boost = parser.floatValue(); - } else if ("min_similarity".equals(currentFieldName) || "minSimilarity".equals(currentFieldName)) { - minSimilarity = parser.text(); + } else if (FUZZINESS.match(currentFieldName, parseContext.parseFlags())) { + fuzziness = Fuzziness.parse(parser); } else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) { prefixLength = parser.intValue(); } else if ("max_expansions".equals(currentFieldName) || "maxExpansions".equals(currentFieldName)) { @@ -112,14 +116,11 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null) { if (smartNameFieldMappers.hasMapper()) { - query = smartNameFieldMappers.mapper().fuzzyQuery(value, minSimilarity, prefixLength, maxExpansions, transpositions); + query = smartNameFieldMappers.mapper().fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions); } } if (query == null) { - //LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float - int edits = FuzzyQuery.floatToEdits(Float.parseFloat(minSimilarity), - value.codePointCount(0, value.length())); - query = new FuzzyQuery(new Term(fieldName, value), edits, prefixLength, maxExpansions, transpositions); + query = new FuzzyQuery(new Term(fieldName, value), fuzziness.asDistance(value), prefixLength, maxExpansions, transpositions); } if (query instanceof MultiTermQuery) { QueryParsers.setRewriteMethod((MultiTermQuery) query, rewriteMethod); diff --git a/src/main/java/org/elasticsearch/index/query/IndexQueryParserService.java b/src/main/java/org/elasticsearch/index/query/IndexQueryParserService.java index afb099d24fdbc..3701f1a0eb4ab 100644 --- a/src/main/java/org/elasticsearch/index/query/IndexQueryParserService.java +++ b/src/main/java/org/elasticsearch/index/query/IndexQueryParserService.java @@ -26,6 +26,7 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.cache.recycler.CacheRecycler; import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.Queries; @@ -47,6 +48,7 @@ import org.elasticsearch.script.ScriptService; import java.io.IOException; +import java.util.EnumSet; import java.util.List; import java.util.Map; @@ -93,6 +95,7 @@ protected QueryParseContext initialValue() { private String defaultField; private boolean queryStringLenient; + private final boolean strict; @Inject public IndexQueryParserService(Index index, @IndexSettings Settings indexSettings, @@ -114,6 +117,7 @@ public IndexQueryParserService(Index index, @IndexSettings Settings indexSetting this.defaultField = indexSettings.get("index.query.default_field", AllFieldMapper.NAME); this.queryStringLenient = indexSettings.getAsBoolean("index.query_string.lenient", false); + this.strict = indexSettings.getAsBoolean("index.query.parse.strict", false); List queryParsers = newArrayList(); if (namedQueryParsers != null) { @@ -311,6 +315,9 @@ public ParsedQuery parseQuery(BytesReference source) { private ParsedQuery parse(QueryParseContext parseContext, XContentParser parser) throws IOException, QueryParsingException { parseContext.reset(parser); + if (strict) { + parseContext.parseFlags(EnumSet.of(ParseField.Flag.STRICT)); + } Query query = parseContext.parseInnerQuery(); if (query == null) { query = Queries.newMatchNoDocsQuery(); diff --git a/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java index 93ce032b2e0d8..ba985089eaf9c 100644 --- a/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.query; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; @@ -69,7 +70,7 @@ public static enum ZeroTermsQuery { private Integer slop; - private String fuzziness; + private Fuzziness fuzziness; private Integer prefixLength; @@ -82,11 +83,11 @@ public static enum ZeroTermsQuery { private String fuzzyRewrite = null; private Boolean lenient; - + private Boolean fuzzyTranspositions = null; private ZeroTermsQuery zeroTermsQuery; - + private Float cutoff_Frequency = null; private String queryName; @@ -141,10 +142,10 @@ public MatchQueryBuilder slop(int slop) { } /** - * Sets the minimum similarity used when evaluated to a fuzzy query type. Defaults to "0.5". + * Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */ public MatchQueryBuilder fuzziness(Object fuzziness) { - this.fuzziness = fuzziness.toString(); + this.fuzziness = Fuzziness.build(fuzziness); return this; } @@ -161,7 +162,7 @@ public MatchQueryBuilder maxExpansions(int maxExpansions) { this.maxExpansions = maxExpansions; return this; } - + /** * Set a cutoff value in [0..1] (or absolute number >=1) representing the * maximum threshold of a terms document frequency to be considered a low @@ -186,7 +187,7 @@ public MatchQueryBuilder fuzzyRewrite(String fuzzyRewrite) { this.fuzzyRewrite = fuzzyRewrite; return this; } - + public MatchQueryBuilder fuzzyTranspositions(boolean fuzzyTranspositions) { //LUCENE 4 UPGRADE add documentation this.fuzzyTranspositions = fuzzyTranspositions; @@ -236,7 +237,7 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio builder.field("slop", slop); } if (fuzziness != null) { - builder.field("fuzziness", fuzziness); + fuzziness.toXContent(builder, params); } if (prefixLength != null) { builder.field("prefix_length", prefixLength); @@ -269,7 +270,7 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio if (queryName != null) { builder.field("_name", queryName); } - + builder.endObject(); builder.endObject(); diff --git a/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java b/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java index 4708b24e24c80..ba8f8536154ec 100644 --- a/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java @@ -25,6 +25,7 @@ import org.apache.lucene.search.Query; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.support.QueryParsers; import org.elasticsearch.index.search.MatchQuery; @@ -102,8 +103,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars boost = parser.floatValue(); } else if ("slop".equals(currentFieldName) || "phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) { matchQuery.setPhraseSlop(parser.intValue()); - } else if ("fuzziness".equals(currentFieldName)) { - matchQuery.setFuzziness(parser.textOrNull()); + } else if (Fuzziness.FIELD.match(currentFieldName, parseContext.parseFlags())) { + matchQuery.setFuzziness(Fuzziness.parse(parser)); } else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) { matchQuery.setFuzzyPrefixLength(parser.intValue()); } else if ("max_expansions".equals(currentFieldName) || "maxExpansions".equals(currentFieldName)) { diff --git a/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java index b11ac311969c3..19c8177576cb0 100644 --- a/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java @@ -21,6 +21,7 @@ import com.carrotsearch.hppc.ObjectFloatOpenHashMap; import com.google.common.collect.Lists; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; @@ -48,7 +49,7 @@ public class MultiMatchQueryBuilder extends BaseQueryBuilder implements Boostabl private Integer slop; - private String fuzziness; + private Fuzziness fuzziness; private Integer prefixLength; @@ -143,10 +144,10 @@ public MultiMatchQueryBuilder slop(int slop) { } /** - * Sets the minimum similarity used when evaluated to a fuzzy query type. Defaults to "0.5". + * Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */ public MultiMatchQueryBuilder fuzziness(Object fuzziness) { - this.fuzziness = fuzziness.toString(); + this.fuzziness = Fuzziness.build(fuzziness); return this; } @@ -252,7 +253,7 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio builder.field("slop", slop); } if (fuzziness != null) { - builder.field("fuzziness", fuzziness); + fuzziness.toXContent(builder, params); } if (prefixLength != null) { builder.field("prefix_length", prefixLength); diff --git a/src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java b/src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java index 7a3ead68a8a14..68cdedc7a1d16 100644 --- a/src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java @@ -24,6 +24,7 @@ import org.apache.lucene.search.Query; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.support.QueryParsers; import org.elasticsearch.index.search.MatchQuery; @@ -99,8 +100,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars boost = parser.floatValue(); } else if ("slop".equals(currentFieldName) || "phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) { multiMatchQuery.setPhraseSlop(parser.intValue()); - } else if ("fuzziness".equals(currentFieldName)) { - multiMatchQuery.setFuzziness(parser.textOrNull()); + } else if (Fuzziness.FIELD.match(currentFieldName, parseContext.parseFlags())) { + multiMatchQuery.setFuzziness(Fuzziness.parse(parser)); } else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) { multiMatchQuery.setFuzzyPrefixLength(parser.intValue()); } else if ("max_expansions".equals(currentFieldName) || "maxExpansions".equals(currentFieldName)) { diff --git a/src/main/java/org/elasticsearch/index/query/QueryParseContext.java b/src/main/java/org/elasticsearch/index/query/QueryParseContext.java index f0860db4c1b5e..43eb4d86c2da6 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryParseContext.java +++ b/src/main/java/org/elasticsearch/index/query/QueryParseContext.java @@ -29,6 +29,7 @@ import org.apache.lucene.search.similarities.Similarity; import org.elasticsearch.cache.recycler.CacheRecycler; import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.Index; import org.elasticsearch.index.analysis.AnalysisService; @@ -45,10 +46,7 @@ import org.elasticsearch.search.lookup.SearchLookup; import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; -import java.util.Map; -import java.util.Set; +import java.util.*; /** * @@ -85,12 +83,24 @@ public static void removeTypes() { private XContentParser parser; + private EnumSet parseFlags = ParseField.EMPTY_FLAGS; + + public QueryParseContext(Index index, IndexQueryParserService indexQueryParser) { this.index = index; this.indexQueryParser = indexQueryParser; } + public void parseFlags(EnumSet parseFlags) { + this.parseFlags = parseFlags == null ? ParseField.EMPTY_FLAGS : parseFlags; + } + + public EnumSet parseFlags() { + return parseFlags; + } + public void reset(XContentParser jp) { + this.parseFlags = ParseField.EMPTY_FLAGS; this.lookup = null; this.parser = jp; this.namedFilters.clear(); diff --git a/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java index 7d99d41ff1d84..b01ba1e590580 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.query; import com.carrotsearch.hppc.ObjectFloatOpenHashMap; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; @@ -35,7 +36,6 @@ * (using {@link #field(String)}), will run the parsed query against the provided fields, and combine * them either using DisMax or a plain boolean query (see {@link #useDisMax(boolean)}). *

- * (shay.baon) */ public class QueryStringQueryBuilder extends BaseQueryBuilder implements BoostableQueryBuilder { @@ -68,7 +68,7 @@ public static enum Operator { private float boost = -1; - private float fuzzyMinSim = -1; + private Fuzziness fuzziness; private int fuzzyPrefixLength = -1; private int fuzzyMaxExpansions = -1; private String fuzzyRewrite; @@ -226,15 +226,15 @@ public QueryStringQueryBuilder enablePositionIncrements(boolean enablePositionIn } /** - * Set the minimum similarity for fuzzy queries. Default is 0.5f. + * Set the edit distance for fuzzy queries. Default is "AUTO". */ - public QueryStringQueryBuilder fuzzyMinSim(float fuzzyMinSim) { - this.fuzzyMinSim = fuzzyMinSim; + public QueryStringQueryBuilder fuzziness(Fuzziness fuzziness) { + this.fuzziness = fuzziness; return this; } /** - * Set the minimum similarity for fuzzy queries. Default is 0.5f. + * Set the minimum prefix length for fuzzy queries. Default is 1. */ public QueryStringQueryBuilder fuzzyPrefixLength(int fuzzyPrefixLength) { this.fuzzyPrefixLength = fuzzyPrefixLength; @@ -356,8 +356,8 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep if (enablePositionIncrements != null) { builder.field("enable_position_increments", enablePositionIncrements); } - if (fuzzyMinSim != -1) { - builder.field("fuzzy_min_sim", fuzzyMinSim); + if (fuzziness != null) { + fuzziness.toXContent(builder, params); } if (boost != -1) { builder.field("boost", boost); diff --git a/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java b/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java index 00f2aeed6fcd3..6d3dfc4d245aa 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java @@ -25,11 +25,13 @@ import org.apache.lucene.queryparser.classic.QueryParserSettings; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.query.support.QueryParsers; @@ -45,6 +47,7 @@ public class QueryStringQueryParser implements QueryParser { public static final String NAME = "query_string"; + private static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("fuzzy_min_sim"); private final boolean defaultAnalyzeWildcard; private final boolean defaultAllowLeadingWildcard; @@ -167,8 +170,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars qpSettings.fuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull())); } else if ("phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) { qpSettings.phraseSlop(parser.intValue()); - } else if ("fuzzy_min_sim".equals(currentFieldName) || "fuzzyMinSim".equals(currentFieldName)) { - qpSettings.fuzzyMinSim(parser.floatValue()); + } else if (FUZZINESS.match(currentFieldName, parseContext.parseFlags())) { + qpSettings.fuzzyMinSim(Fuzziness.parse(parser).asSimilarity()); } else if ("boost".equals(currentFieldName)) { qpSettings.boost(parser.floatValue()); } else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) { diff --git a/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 51cce04f87c0b..bc5e9e3a45ea5 100644 --- a/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -35,6 +35,7 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.QueryParseContext; @@ -69,7 +70,7 @@ public static enum ZeroTermsQuery { protected int phraseSlop = 0; - protected String fuzziness = null; + protected Fuzziness fuzziness = null; protected int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; @@ -112,7 +113,7 @@ public void setPhraseSlop(int phraseSlop) { this.phraseSlop = phraseSlop; } - public void setFuzziness(String fuzziness) { + public void setFuzziness(Fuzziness fuzziness) { this.fuzziness = fuzziness; } @@ -365,10 +366,7 @@ private Query newTermQuery(@Nullable FieldMapper mapper, Term term) { QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod); } } - String text = term.text(); - //LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float - int edits = FuzzyQuery.floatToEdits(Float.parseFloat(fuzziness), - text.codePointCount(0, text.length())); + int edits = fuzziness.asDistance(term.text()); FuzzyQuery query = new FuzzyQuery(term, edits, fuzzyPrefixLength, maxExpansions, transpositions); QueryParsers.setRewriteMethod(query, rewriteMethod); return query; diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java index cbee27b2915bf..2be279c59e23e 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java @@ -19,6 +19,8 @@ package org.elasticsearch.search.suggest.completion; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.search.suggest.SuggestContextParser; @@ -34,6 +36,7 @@ public class CompletionSuggestParser implements SuggestContextParser { private CompletionSuggester completionSuggester; + private static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("edit_distance"); public CompletionSuggestParser(CompletionSuggester completionSuggester) { this.completionSuggester = completionSuggester; @@ -60,8 +63,8 @@ public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, Ma if (token == XContentParser.Token.FIELD_NAME) { fuzzyConfigName = parser.currentName(); } else if (token.isValue()) { - if ("edit_distance".equals(fuzzyConfigName) || "editDistance".equals(fuzzyConfigName)) { - suggestion.setFuzzyEditDistance(parser.intValue()); + if (FUZZINESS.match(fuzzyConfigName, ParseField.EMPTY_FLAGS)) { + suggestion.setFuzzyEditDistance(Fuzziness.parse(parser).asDistance()); } else if ("transpositions".equals(fuzzyConfigName)) { suggestion.setFuzzyTranspositions(parser.booleanValue()); } else if ("min_length".equals(fuzzyConfigName) || "minLength".equals(fuzzyConfigName)) { diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java index 2c67d2e5f96e6..2059a28d0ae73 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.suggest.completion; import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.search.suggest.SuggestBuilder; @@ -34,18 +35,18 @@ public CompletionSuggestionFuzzyBuilder(String name) { super(name, "completion"); } - private int fuzzyEditDistance = XFuzzySuggester.DEFAULT_MAX_EDITS; + private Fuzziness fuzziness = Fuzziness.ONE; private boolean fuzzyTranspositions = XFuzzySuggester.DEFAULT_TRANSPOSITIONS; private int fuzzyMinLength = XFuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH; private int fuzzyPrefixLength = XFuzzySuggester.DEFAULT_NON_FUZZY_PREFIX; private boolean unicodeAware = XFuzzySuggester.DEFAULT_UNICODE_AWARE; - public int getFuzzyEditDistance() { - return fuzzyEditDistance; + public Fuzziness getFuzziness() { + return fuzziness; } - public CompletionSuggestionFuzzyBuilder setFuzzyEditDistance(int fuzzyEditDistance) { - this.fuzzyEditDistance = fuzzyEditDistance; + public CompletionSuggestionFuzzyBuilder setFuzziness(Fuzziness fuzziness) { + this.fuzziness = fuzziness; return this; } @@ -89,8 +90,8 @@ public CompletionSuggestionFuzzyBuilder setUnicodeAware(boolean unicodeAware) { protected XContentBuilder innerToXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { builder.startObject("fuzzy"); - if (fuzzyEditDistance != XFuzzySuggester.DEFAULT_MAX_EDITS) { - builder.field("edit_distance", fuzzyEditDistance); + if (fuzziness != Fuzziness.ONE) { + fuzziness.toXContent(builder, params); } if (fuzzyTranspositions != XFuzzySuggester.DEFAULT_TRANSPOSITIONS) { builder.field("transpositions", fuzzyTranspositions); diff --git a/src/test/java/org/elasticsearch/common/ParseFieldTests.java b/src/test/java/org/elasticsearch/common/ParseFieldTests.java new file mode 100644 index 0000000000000..d1dca3a482446 --- /dev/null +++ b/src/test/java/org/elasticsearch/common/ParseFieldTests.java @@ -0,0 +1,74 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.common; + +import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.test.ElasticsearchTestCase; + +import java.util.EnumSet; + +import static org.hamcrest.CoreMatchers.*; + +public class ParseFieldTests extends ElasticsearchTestCase { + + public void testParse() { + String[] values = new String[]{"foo_bar", "fooBar"}; + ParseField field = new ParseField(randomFrom(values)); + String[] deprecated = new String[]{"barFoo", "bar_foo"}; + ParseField withDepredcations = field.withDeprecation("Foobar", randomFrom(deprecated)); + assertThat(field, not(sameInstance(withDepredcations))); + assertThat(field.match(randomFrom(values), ParseField.EMPTY_FLAGS), is(true)); + assertThat(field.match("foo bar", ParseField.EMPTY_FLAGS), is(false)); + assertThat(field.match(randomFrom(deprecated), ParseField.EMPTY_FLAGS), is(false)); + assertThat(field.match("barFoo", ParseField.EMPTY_FLAGS), is(false)); + + + assertThat(withDepredcations.match(randomFrom(values), ParseField.EMPTY_FLAGS), is(true)); + assertThat(withDepredcations.match("foo bar", ParseField.EMPTY_FLAGS), is(false)); + assertThat(withDepredcations.match(randomFrom(deprecated), ParseField.EMPTY_FLAGS), is(true)); + assertThat(withDepredcations.match("barFoo", ParseField.EMPTY_FLAGS), is(true)); + + // now with strict mode + EnumSet flags = EnumSet.of(ParseField.Flag.STRICT); + assertThat(field.match(randomFrom(values), flags), is(true)); + assertThat(field.match("foo bar", flags), is(false)); + assertThat(field.match(randomFrom(deprecated), flags), is(false)); + assertThat(field.match("barFoo", flags), is(false)); + + + assertThat(withDepredcations.match(randomFrom(values), flags), is(true)); + assertThat(withDepredcations.match("foo bar", flags), is(false)); + try { + withDepredcations.match(randomFrom(deprecated), flags); + fail(); + } catch (ElasticsearchIllegalArgumentException ex) { + + } + + try { + withDepredcations.match("barFoo", flags); + fail(); + } catch (ElasticsearchIllegalArgumentException ex) { + + } + + + } + +} diff --git a/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java b/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java new file mode 100644 index 0000000000000..448d0522efb04 --- /dev/null +++ b/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java @@ -0,0 +1,199 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.common.unit; + +import org.elasticsearch.common.xcontent.XContent; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.test.ElasticsearchTestCase; +import org.junit.Test; + +import java.io.IOException; + +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.hamcrest.CoreMatchers.*; +import static org.hamcrest.number.IsCloseTo.closeTo; + +public class FuzzinessTests extends ElasticsearchTestCase { + + @Test + public void testNumerics() { + String[] options = new String[]{"1.0", "1", "1.000000"}; + assertThat(Fuzziness.build(randomFrom(options)).asByte(), equalTo((byte) 1)); + assertThat(Fuzziness.build(randomFrom(options)).asInt(), equalTo(1)); + assertThat(Fuzziness.build(randomFrom(options)).asFloat(), equalTo(1f)); + assertThat(Fuzziness.build(randomFrom(options)).asDouble(), equalTo(1d)); + assertThat(Fuzziness.build(randomFrom(options)).asLong(), equalTo(1l)); + assertThat(Fuzziness.build(randomFrom(options)).asShort(), equalTo((short) 1)); + } + + @Test + public void testParseFromXContent() throws IOException { + final int iters = atLeast(10); + for (int i = 0; i < iters; i++) { + { + XContent xcontent = XContentType.JSON.xContent(); + float floatValue = randomFloat(); + String json = jsonBuilder().startObject() + .field(Fuzziness.X_FIELD_NAME, floatValue) + .endObject().string(); + XContentParser parser = xcontent.createParser(json); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); + Fuzziness parse = Fuzziness.parse(parser); + assertThat(parse.asFloat(), equalTo(floatValue)); + assertThat(parse.asDouble(), closeTo((double) floatValue, 0.000001)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + { + XContent xcontent = XContentType.JSON.xContent(); + Integer intValue = frequently() ? randomIntBetween(0, 2) : randomIntBetween(0, 100); + Float floatRep = randomFloat(); + Number value = intValue; + if (randomBoolean()) { + value = new Float(floatRep += intValue); + } + String json = jsonBuilder().startObject() + .field(Fuzziness.X_FIELD_NAME, randomBoolean() ? value.toString() : value) + .endObject().string(); + XContentParser parser = xcontent.createParser(json); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); + assertThat(parser.nextToken(), anyOf(equalTo(XContentParser.Token.VALUE_NUMBER), equalTo(XContentParser.Token.VALUE_STRING))); + Fuzziness parse = Fuzziness.parse(parser); + assertThat(parse.asInt(), equalTo(intValue)); + assertThat((int) parse.asShort(), equalTo(intValue)); + assertThat((int) parse.asByte(), equalTo(intValue)); + assertThat(parse.asLong(), equalTo((long) intValue)); + if (value.intValue() >= 1) { + assertThat(parse.asDistance(), equalTo(Math.min(2, intValue))); + } + assertThat(parser.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + if (intValue.equals(value)) { + switch (intValue) { + case 1: + assertThat(parse, sameInstance(Fuzziness.ONE)); + break; + case 2: + assertThat(parse, sameInstance(Fuzziness.TWO)); + break; + case 0: + assertThat(parse, sameInstance(Fuzziness.ZERO)); + break; + default: + break; + } + } + } + { + XContent xcontent = XContentType.JSON.xContent(); + String json = jsonBuilder().startObject() + .field(Fuzziness.X_FIELD_NAME, randomBoolean() ? "AUTO" : "auto") + .endObject().string(); + if (randomBoolean()) { + json = Fuzziness.AUTO.toXContent(jsonBuilder().startObject(), null).endObject().string(); + } + XContentParser parser = xcontent.createParser(json); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.VALUE_STRING)); + Fuzziness parse = Fuzziness.parse(parser); + assertThat(parse, sameInstance(Fuzziness.AUTO)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + { + String[] values = new String[]{"d", "H", "ms", "s", "S", "w"}; + String actual = randomIntBetween(1, 3) + randomFrom(values); + XContent xcontent = XContentType.JSON.xContent(); + String json = jsonBuilder().startObject() + .field(Fuzziness.X_FIELD_NAME, actual) + .endObject().string(); + XContentParser parser = xcontent.createParser(json); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.VALUE_STRING)); + Fuzziness parse = Fuzziness.parse(parser); + assertThat(parse.asTimeValue(), equalTo(TimeValue.parseTimeValue(actual, null))); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + } + + } + + @Test + public void testAuto() { + final int codePoints = randomIntBetween(0, 10); + String string = randomRealisticUnicodeOfCodepointLength(codePoints); + if (codePoints <= 2) { + assertThat(Fuzziness.AUTO.asDistance(string), equalTo(0)); + assertThat(Fuzziness.fromSimilarity(Fuzziness.AUTO.asSimilarity(string)).asDistance(string), equalTo(0)); + } else if (codePoints > 5) { + assertThat(Fuzziness.AUTO.asDistance(string), equalTo(2)); + assertThat(Fuzziness.fromSimilarity(Fuzziness.AUTO.asSimilarity(string)).asDistance(string), equalTo(2)); + } else { + assertThat(Fuzziness.AUTO.asDistance(string), equalTo(1)); + assertThat(Fuzziness.fromSimilarity(Fuzziness.AUTO.asSimilarity(string)).asDistance(string), equalTo(1)); + } + assertThat(Fuzziness.AUTO.asByte(), equalTo((byte) 1)); + assertThat(Fuzziness.AUTO.asInt(), equalTo(1)); + assertThat(Fuzziness.AUTO.asFloat(), equalTo(1f)); + assertThat(Fuzziness.AUTO.asDouble(), equalTo(1d)); + assertThat(Fuzziness.AUTO.asLong(), equalTo(1l)); + assertThat(Fuzziness.AUTO.asShort(), equalTo((short) 1)); + assertThat(Fuzziness.AUTO.asTimeValue(), equalTo(TimeValue.parseTimeValue("1", TimeValue.timeValueMillis(1)))); + + } + + @Test + public void testAsDistance() { + final int iters = atLeast(10); + for (int i = 0; i < iters; i++) { + Integer integer = Integer.valueOf(randomIntBetween(0, 10)); + String value = "" + (randomBoolean() ? integer.intValue() : integer.floatValue()); + assertThat(Fuzziness.build(value).asDistance(), equalTo(Math.min(2, integer.intValue()))); + } + } + + @Test + public void testSimilarityToDistance() { + assertThat(Fuzziness.fromSimilarity(0.5f).asDistance("ab"), equalTo(1)); + assertThat(Fuzziness.fromSimilarity(0.66f).asDistance("abcefg"), equalTo(2)); + assertThat(Fuzziness.fromSimilarity(0.8f).asDistance("ab"), equalTo(0)); + assertThat(Fuzziness.fromSimilarity(0.8f).asDistance("abcefg"), equalTo(1)); + assertThat((double) Fuzziness.ONE.asSimilarity("abcefg"), closeTo(0.8f, 0.05)); + assertThat((double) Fuzziness.TWO.asSimilarity("abcefg"), closeTo(0.66f, 0.05)); + assertThat((double) Fuzziness.ONE.asSimilarity("ab"), closeTo(0.5f, 0.05)); + + int iters = atLeast(100); + for (int i = 0; i < iters; i++) { + Fuzziness fuzziness = Fuzziness.fromEdits(between(1, 2)); + String string = rarely() ? randomRealisticUnicodeOfLengthBetween(2, 4) : + randomRealisticUnicodeOfLengthBetween(4, 10); + float similarity = fuzziness.asSimilarity(string); + if (similarity != 0.0f) { + Fuzziness similarityBased = Fuzziness.build(similarity); + assertThat((double) similarityBased.asSimilarity(string), closeTo(similarity, 0.05)); + assertThat(similarityBased.asDistance(string), equalTo(Math.min(2, fuzziness.asDistance(string)))); + } + } + } +} diff --git a/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java b/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java index 3eeaf702f6dcb..7da272676bade 100644 --- a/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java +++ b/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java @@ -43,6 +43,7 @@ import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.SettingsModule; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexNameModule; import org.elasticsearch.index.analysis.AnalysisModule; @@ -432,7 +433,7 @@ public void testFuzzyQuery() throws IOException { @Test public void testFuzzyQueryWithFieldsBuilder() throws IOException { IndexQueryParserService queryParser = queryParser(); - Query parsedQuery = queryParser.parse(fuzzyQuery("name.first", "sh").minSimilarity(0.1f).prefixLength(1).boost(2.0f).buildAsBytes()).query(); + Query parsedQuery = queryParser.parse(fuzzyQuery("name.first", "sh").fuzziness(Fuzziness.fromSimilarity(0.1f)).prefixLength(1).boost(2.0f).buildAsBytes()).query(); assertThat(parsedQuery, instanceOf(FuzzyQuery.class)); FuzzyQuery fuzzyQuery = (FuzzyQuery) parsedQuery; assertThat(fuzzyQuery.getTerm(), equalTo(new Term("name.first", "sh"))); diff --git a/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json b/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json index 6ef343ac4549f..3e3d30ffdc0f7 100644 --- a/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json +++ b/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json @@ -2,9 +2,9 @@ "fuzzy":{ "name.first":{ "value":"sh", - "min_similarity":0.1, + "fuzziness":0.1, "prefix_length":1, "boost":2.0 } } -} \ No newline at end of file +} diff --git a/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields2.json b/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields2.json index 371070f66c22b..095ecc6341d7f 100644 --- a/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields2.json +++ b/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields2.json @@ -2,8 +2,8 @@ "fuzzy":{ "age":{ "value":12, - "min_similarity":5, + "fuzziness":5, "boost":2.0 } } -} \ No newline at end of file +} diff --git a/src/test/java/org/elasticsearch/index/query/span-multi-term-fuzzy-range.json b/src/test/java/org/elasticsearch/index/query/span-multi-term-fuzzy-range.json index 4679170c85828..d9ca05b3f3ecd 100644 --- a/src/test/java/org/elasticsearch/index/query/span-multi-term-fuzzy-range.json +++ b/src/test/java/org/elasticsearch/index/query/span-multi-term-fuzzy-range.json @@ -4,7 +4,7 @@ "fuzzy":{ "age":{ "value":12, - "min_similarity":5, + "fuzziness":5, "boost":2.0 } } diff --git a/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchTests.java b/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchTests.java index a600bbcde4805..e80634a467ea5 100644 --- a/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchTests.java +++ b/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchTests.java @@ -34,6 +34,7 @@ import org.elasticsearch.client.Requests; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.MapperException; import org.elasticsearch.index.mapper.MapperParsingException; @@ -502,7 +503,7 @@ public void testThatFuzzySuggesterSupportsEditDistances() throws Exception { // edit distance 2 suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Norw").size(10).setFuzzyEditDistance(2) + new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Norw").size(10).setFuzziness(Fuzziness.TWO) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "Nirvana"); } @@ -520,12 +521,12 @@ public void testThatFuzzySuggesterSupportsTranspositions() throws Exception { refresh(); SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nriv").size(10).setFuzzyTranspositions(false).setFuzzyEditDistance(1) + new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nriv").size(10).setFuzzyTranspositions(false).setFuzziness(Fuzziness.ONE) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo"); suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nriv").size(10).setFuzzyTranspositions(true).setFuzzyEditDistance(1) + new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nriv").size(10).setFuzzyTranspositions(true).setFuzziness(Fuzziness.ONE) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "Nirvana"); } @@ -601,7 +602,7 @@ public void testThatFuzzySuggesterIsUnicodeAware() throws Exception { assertSuggestions(suggestResponse, false, "foo"); // increasing edit distance instead of unicode awareness works again, as this is only a single character - completionSuggestionBuilder.setFuzzyEditDistance(2); + completionSuggestionBuilder.setFuzziness(Fuzziness.TWO); suggestResponse = client().prepareSuggest(INDEX).addSuggestion(completionSuggestionBuilder).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "ööööö"); }