From 9a10b13ae18828d3ebe31e19d68265d7b207461b Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 2 Jan 2014 16:45:24 +0100 Subject: [PATCH] Rename edit_distance/min_similarity to fuzziness A lot of different API's currently use different names for the same logical parameter. Since lucene moved away from the notion of a `similarity` and now uses an `fuzziness` we should generalize this and encapsulate the generation, parsing and creation of these settings across all queries. This commit adds a new `Fuzziness` class that handles the renaming and generalization in a backwards compatible manner. This commit also added a ParseField class to better support deprecated Query DSL parameters The ParseField class allows specifying parameger that have been deprecated. Those parameters can be more easily tracked and removed in future version. This also allows to run queries in `strict` mode per index to throw exceptions if a query is executed with deprected keys. Closes #4082 --- docs/reference/api-conventions.asciidoc | 72 +++++ .../queries/flt-field-query.asciidoc | 4 +- .../query-dsl/queries/flt-query.asciidoc | 4 +- .../query-dsl/queries/fuzzy-query.asciidoc | 85 +++--- .../query-dsl/queries/match-query.asciidoc | 17 +- .../queries/query-string-query.asciidoc | 8 +- .../suggesters/completion-suggest.asciidoc | 7 +- .../classic/MapperQueryParser.java | 3 +- .../org/elasticsearch/common/ParseField.java | 74 +++++ .../elasticsearch/common/unit/Fuzziness.java | 256 ++++++++++++++++++ .../index/mapper/FieldMapper.java | 3 +- .../mapper/core/AbstractFieldMapper.java | 6 +- .../index/mapper/core/ByteFieldMapper.java | 10 +- .../index/mapper/core/DateFieldMapper.java | 8 +- .../index/mapper/core/DoubleFieldMapper.java | 5 +- .../index/mapper/core/FloatFieldMapper.java | 5 +- .../index/mapper/core/IntegerFieldMapper.java | 10 +- .../index/mapper/core/LongFieldMapper.java | 10 +- .../index/mapper/core/NumberFieldMapper.java | 3 +- .../index/mapper/core/ShortFieldMapper.java | 10 +- .../mapper/internal/BoostFieldMapper.java | 5 +- .../index/mapper/ip/IpFieldMapper.java | 11 +- .../query/FuzzyLikeThisFieldQueryBuilder.java | 11 +- .../query/FuzzyLikeThisFieldQueryParser.java | 14 +- .../query/FuzzyLikeThisQueryBuilder.java | 11 +- .../index/query/FuzzyLikeThisQueryParser.java | 11 +- .../index/query/FuzzyQueryBuilder.java | 18 +- .../index/query/FuzzyQueryParser.java | 19 +- .../index/query/IndexQueryParserService.java | 7 + .../index/query/MatchQueryBuilder.java | 19 +- .../index/query/MatchQueryParser.java | 5 +- .../index/query/MultiMatchQueryBuilder.java | 9 +- .../index/query/MultiMatchQueryParser.java | 5 +- .../index/query/QueryParseContext.java | 18 +- .../index/query/QueryStringQueryBuilder.java | 16 +- .../index/query/QueryStringQueryParser.java | 7 +- .../index/search/MatchQuery.java | 10 +- .../completion/CompletionSuggestParser.java | 7 +- .../CompletionSuggestionFuzzyBuilder.java | 15 +- .../elasticsearch/common/ParseFieldTests.java | 74 +++++ .../common/unit/FuzzinessTests.java | 199 ++++++++++++++ .../query/SimpleIndexQueryParserTests.java | 3 +- .../index/query/fuzzy-with-fields.json | 4 +- .../index/query/fuzzy-with-fields2.json | 4 +- .../query/span-multi-term-fuzzy-range.json | 2 +- .../suggest/CompletionSuggestSearchTests.java | 9 +- 46 files changed, 917 insertions(+), 196 deletions(-) create mode 100644 src/main/java/org/elasticsearch/common/ParseField.java create mode 100644 src/main/java/org/elasticsearch/common/unit/Fuzziness.java create mode 100644 src/test/java/org/elasticsearch/common/ParseFieldTests.java create mode 100644 src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java diff --git a/docs/reference/api-conventions.asciidoc b/docs/reference/api-conventions.asciidoc index 28e4ba71779f2..0f1799fc3cf36 100644 --- a/docs/reference/api-conventions.asciidoc +++ b/docs/reference/api-conventions.asciidoc @@ -122,6 +122,21 @@ fields within a document indexed treated as boolean fields. All REST APIs support providing numbered parameters as `string` on top of supporting the native JSON number types. +[[time-units]] +[float] +=== Time units + +Whenever durations need to be specified, eg for a `timeout` parameter, the duration +can be specified as a whole number representing time in milliseconds, or as a time value like `2d` for 2 days. The supported units are: + +[horizontal] +`y`:: Year +`M`:: Month +`w`:: Week +`h`:: Hour +`m`:: Minute +`s`:: Second + [[distance-units]] [float] === Distance Units @@ -144,6 +159,63 @@ Centimeter:: `cm` or `centimeters` Millimeter:: `mm` or `millimeters` +[[fuzziness]] +[float] +=== Fuzziness + +Some queries and APIs support parameters to allow inexact _fuzzy_ matching, +using the `fuzziness` parameter. The `fuzziness` parameter is context +sensitive which means that it depends on the type of the field being queried: + +[float] +==== Numeric, date and IPv4 fields + +When querying numeric, date and IPv4 fields, `fuzziness` is interpreted as a +`+/- margin. It behaves like a <> where: + + -fuzziness <= field value <= +fuzziness + +The `fuzziness` parameter should be set to a numeric value, eg `2` or `2.0`. A +`date` field interprets a long as milliseconds, but also accepts a string +containing a time value -- `"1h"` -- as explained in <>. An `ip` +field accepts a long or another IPv4 address (which will be converted into a +long). + +[float] +==== String fields + +When querying `string` fields, `fuzziness` is interpreted as a +http://en.wikipedia.org/wiki/Levenshtein_distance[Levenshtein Edit Distance] +-- the number of one character changes that need to be made to one string to +make it the same as another string. + +The `fuzziness` parameter can be specified as: + +`0`, `1`, `2`:: + +the maximum allowed Levenshtein Edit Distance (or number of edits) + +`AUTO`:: ++ +-- +generates an edit distance based on the length of the term. For lengths: + +`0..1`:: must match exactly +`1..4`:: one edit allowed +`>4`:: two edits allowed + +`AUTO` should generally be the preferred value for `fuzziness`. +-- + +`0.0..1.0`:: + +converted into an edit distance using the formula: `length(term) * (1.0 - +fuzziness)`, eg a `fuzziness` of `0.6` with a term of length 10 would result +in an edit distance of `4`. Note: in all APIs except for the +<>, the maximum allowed edit distance is `2`. + + + [float] === Result Casing diff --git a/docs/reference/query-dsl/queries/flt-field-query.asciidoc b/docs/reference/query-dsl/queries/flt-field-query.asciidoc index 734983c338976..205dc61307d55 100644 --- a/docs/reference/query-dsl/queries/flt-field-query.asciidoc +++ b/docs/reference/query-dsl/queries/flt-field-query.asciidoc @@ -33,8 +33,8 @@ The `fuzzy_like_this_field` top level parameters include: |`max_query_terms` |The maximum number of query terms that will be included in any generated query. Defaults to `25`. -|`min_similarity` |The minimum similarity of the term variants. Defaults -to `0.5`. +|`fuzziness` |The fuzziness of the term variants. Defaults +to `0.5`. See <>. |`prefix_length` |Length of required common prefix on variant terms. Defaults to `0`. diff --git a/docs/reference/query-dsl/queries/flt-query.asciidoc b/docs/reference/query-dsl/queries/flt-query.asciidoc index beb49ea9367ab..231de6b6c048d 100644 --- a/docs/reference/query-dsl/queries/flt-query.asciidoc +++ b/docs/reference/query-dsl/queries/flt-query.asciidoc @@ -32,8 +32,8 @@ Defaults to the `_all` field. |`max_query_terms` |The maximum number of query terms that will be included in any generated query. Defaults to `25`. -|`min_similarity` |The minimum similarity of the term variants. Defaults -to `0.5`. +|`fuzziness` |The minimum similarity of the term variants. Defaults +to `0.5`. See <>. |`prefix_length` |Length of required common prefix on variant terms. Defaults to `0`. diff --git a/docs/reference/query-dsl/queries/fuzzy-query.asciidoc b/docs/reference/query-dsl/queries/fuzzy-query.asciidoc index 86a1062d16922..082f3f1a123f7 100644 --- a/docs/reference/query-dsl/queries/fuzzy-query.asciidoc +++ b/docs/reference/query-dsl/queries/fuzzy-query.asciidoc @@ -1,12 +1,15 @@ [[query-dsl-fuzzy-query]] === Fuzzy Query -A fuzzy query that uses similarity based on Levenshtein (edit -distance) algorithm. This maps to Lucene's `FuzzyQuery`. +The fuzzy query uses similarity based on Levenshtein edit distance for +`string` fields, and a `+/-` margin on numeric and date fields. -Warning: this query is not very scalable with its default prefix length -of 0 - in this case, *every* term will be enumerated and cause an edit -score calculation or `max_expansions` is not set. +==== String fields + +The `fuzzy` query generates all possible matching terms that are within the +maximum edit distance specified in `fuzziness` and then checks the term +dictionary to find out which of those generated terms actually exist in the +index. Here is a simple example: @@ -17,31 +20,57 @@ Here is a simple example: } -------------------------------------------------- -More complex settings can be set (the values here are the default -values): +Or with more advanced settings: [source,js] -------------------------------------------------- - { - "fuzzy" : { - "user" : { - "value" : "ki", - "boost" : 1.0, - "min_similarity" : 0.5, - "prefix_length" : 0 - } +{ + "fuzzy" : { + "user" : { + "value" : "ki", + "boost" : 1.0, + "fuzziness" : 2, + "prefix_length" : 0, + "max_expansions": 100 } } +} -------------------------------------------------- -The `max_expansions` parameter (unbounded by default) controls the -number of terms the fuzzy query will expand to. +[float] +===== Parameters + +[horizontal] +`fuzziness`:: + + The maximum edit distance. Defaults to `AUTO`. See <>. + +`prefix_length`:: + + The number of initial characters which will not be ``fuzzified''. This + helps to reduce the number of terms which must be examined. Defaults + to `0`. + +`max_expansions`:: + + The maximum number of terms that the `fuzzy` query will expand to. + Defaults to `0`. + + +WARNING: this query can be very heavy if `prefix_length` and `max_expansions` +are both set to their defaults of `0`. This could cause every term in the +index to be examined! + [float] -==== Numeric / Date Fuzzy +==== Numeric and date fields + +Performs a <> ``around'' the value using the +`fuzziness` value as a `+/-` range, where: + + -fuzziness <= field value <= +fuzziness -`fuzzy` query on a numeric field will result in a range query "around" -the value using the `min_similarity` value. For example: +For example: [source,js] -------------------------------------------------- @@ -49,14 +78,14 @@ the value using the `min_similarity` value. For example: "fuzzy" : { "price" : { "value" : 12, - "min_similarity" : 2 + "fuzziness" : 2 } } } -------------------------------------------------- -Will result in a range query between 10 and 14. Same applies to dates, -with support for time format for the `min_similarity` field: +Will result in a range query between 10 and 14. Date fields support +<>, eg: [source,js] -------------------------------------------------- @@ -64,16 +93,10 @@ with support for time format for the `min_similarity` field: "fuzzy" : { "created" : { "value" : "2010-02-05T12:05:07", - "min_similarity" : "1d" + "fuzziness" : "1d" } } } -------------------------------------------------- -In the mapping, numeric and date types now allow to configure a -`fuzzy_factor` mapping value (defaults to 1), which will be used to -multiply the fuzzy value by it when used in a `query_string` type query. -For example, for dates, a fuzzy factor of "1d" will result in -multiplying whatever fuzzy value provided in the min_similarity by it. -Note, this is explicitly supported since query_string query only allowed -for similarity valued between 0.0 and 1.0. +See <> for more details about accepted values. diff --git a/docs/reference/query-dsl/queries/match-query.asciidoc b/docs/reference/query-dsl/queries/match-query.asciidoc index 5460cbff1e448..2bf8c8dff7853 100644 --- a/docs/reference/query-dsl/queries/match-query.asciidoc +++ b/docs/reference/query-dsl/queries/match-query.asciidoc @@ -34,9 +34,10 @@ The `analyzer` can be set to control which analyzer will perform the analysis process on the text. It default to the field explicit mapping definition, or the default search analyzer. -`fuzziness` can be set to a value (depending on the relevant type, for -string types it should be a value between `0.0` and `1.0`) to constructs -fuzzy queries for each term analyzed. The `prefix_length` and +`fuzziness` allows _fuzzy matching_ based on the type of field being queried. +See <> for allowed settings. + +The `prefix_length` and `max_expansions` can be set in this case to control the fuzzy process. If the fuzzy option is set the query will use `constant_score_rewrite` as its <> for allowed settings. |`fuzzy_prefix_length` |Set the prefix length for fuzzy queries. Default is `0`. @@ -70,7 +70,7 @@ in the resulting boolean query should match. It can be an absolute value both>>. |`lenient` |If set to `true` will cause format based failures (like -providing text to a numeric field) to be ignored. +providing text to a numeric field) to be ignored. |======================================================================= When a multi term query is being generated, one can control how it gets @@ -128,7 +128,7 @@ search on all "city" fields: Another option is to provide the wildcard fields search in the query string itself (properly escaping the `*` sign), for example: -`city.\*:something`. +`city.\*:something`. When running the `query_string` query against multiple fields, the following additional parameters are allowed: diff --git a/docs/reference/search/suggesters/completion-suggest.asciidoc b/docs/reference/search/suggesters/completion-suggest.asciidoc index da1b7fc2f339e..7c672db4418ab 100644 --- a/docs/reference/search/suggesters/completion-suggest.asciidoc +++ b/docs/reference/search/suggesters/completion-suggest.asciidoc @@ -199,7 +199,7 @@ curl -X POST 'localhost:9200/music/_suggest?pretty' -d '{ "completion" : { "field" : "suggest", "fuzzy" : { - "edit_distance" : 2 + "fuzziness" : 2 } } } @@ -210,8 +210,9 @@ The fuzzy query can take specific fuzzy parameters. The following parameters are supported: [horizontal] -`edit_distance`:: - Maximum edit distance, defaults to `1` +`fuzziness`:: + The fuzziness factor, defaults to `AUTO`. + See <> for allowed settings. `transpositions`:: Sets if transpositions should be counted diff --git a/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java index c007a5922f54d..75e2af0f909da 100644 --- a/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java +++ b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java @@ -30,6 +30,7 @@ import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.lucene.search.XFilteredQuery; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.QueryParseContext; @@ -435,7 +436,7 @@ private Query getFuzzyQuerySingle(String field, String termStr, String minSimila if (currentMapper != null) { try { //LUCENE 4 UPGRADE I disabled transpositions here by default - maybe this needs to be changed - Query fuzzyQuery = currentMapper.fuzzyQuery(termStr, minSimilarity, fuzzyPrefixLength, settings.fuzzyMaxExpansions(), false); + Query fuzzyQuery = currentMapper.fuzzyQuery(termStr, Fuzziness.build(minSimilarity), fuzzyPrefixLength, settings.fuzzyMaxExpansions(), false); return wrapSmartNameQuery(fuzzyQuery, fieldMappers, parseContext); } catch (RuntimeException e) { if (settings.lenient()) { diff --git a/src/main/java/org/elasticsearch/common/ParseField.java b/src/main/java/org/elasticsearch/common/ParseField.java new file mode 100644 index 0000000000000..6e89e747890b9 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/ParseField.java @@ -0,0 +1,74 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.common; + +import org.elasticsearch.ElasticsearchIllegalArgumentException; + +import java.util.EnumSet; +import java.util.HashSet; + +/** + */ +public class ParseField { + private final String camelCaseName; + private final String underscoreName; + private final String[] deprecatedNames; + + public static final EnumSet EMPTY_FLAGS = EnumSet.noneOf(Flag.class); + + public static enum Flag { + STRICT + } + + public ParseField(String value, String... deprecatedNames) { + camelCaseName = Strings.toCamelCase(value); + underscoreName = Strings.toUnderscoreCase(value); + if (deprecatedNames == null || deprecatedNames.length == 0) { + this.deprecatedNames = Strings.EMPTY_ARRAY; + } else { + final HashSet set = new HashSet(); + for (String depName : deprecatedNames) { + set.add(Strings.toCamelCase(depName)); + set.add(Strings.toUnderscoreCase(depName)); + } + this.deprecatedNames = set.toArray(new String[0]); + } + } + + public ParseField withDeprecation(String... deprecatedNames) { + return new ParseField(this.underscoreName, deprecatedNames); + } + + public boolean match(String currentFieldName, EnumSet flags) { + if (currentFieldName.equals(camelCaseName) || currentFieldName.equals(underscoreName)) { + return true; + } + for (String depName : deprecatedNames) { + if (currentFieldName.equals(depName)) { + if (flags.contains(Flag.STRICT)) { + throw new ElasticsearchIllegalArgumentException("Deprecated field [" + currentFieldName + "] used expected [" + underscoreName + "] instead"); + } + return true; + } + } + return false; + } + + +} diff --git a/src/main/java/org/elasticsearch/common/unit/Fuzziness.java b/src/main/java/org/elasticsearch/common/unit/Fuzziness.java new file mode 100644 index 0000000000000..712b37abcf906 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/unit/Fuzziness.java @@ -0,0 +1,256 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.common.unit; + +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.util.automaton.LevenshteinAutomata; +import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.Preconditions; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentBuilderString; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; + +/** + * A unit class that encapsulates all in-exact search + * parsing and conversion from similarities to edit distances + * etc. + */ +public final class Fuzziness implements ToXContent { + + public static final XContentBuilderString X_FIELD_NAME = new XContentBuilderString("fuzziness"); + public static final Fuzziness ZERO = new Fuzziness(0); + public static final Fuzziness ONE = new Fuzziness(1); + public static final Fuzziness TWO = new Fuzziness(2); + public static final Fuzziness AUTO = new Fuzziness("AUTO"); + public static final ParseField FIELD = new ParseField(X_FIELD_NAME.camelCase().getValue()); + + private final Object fuzziness; + + private Fuzziness(int fuzziness) { + Preconditions.checkArgument(fuzziness >= 0 && fuzziness <= 2, "Valid edit distances are [0, 1, 2] but was [" + fuzziness + "]"); + this.fuzziness = fuzziness; + } + + private Fuzziness(float fuzziness) { + Preconditions.checkArgument(fuzziness >= 0.0 && fuzziness < 1.0f, "Valid similarities must be in the interval [0..1] but was [" + fuzziness + "]"); + this.fuzziness = fuzziness; + } + + private Fuzziness(String fuzziness) { + this.fuzziness = fuzziness; + } + + /** + * Creates a {@link Fuzziness} instance from a similarity. The value must be in the range [0..1) + */ + public static Fuzziness fromSimilarity(float similarity) { + return new Fuzziness(similarity); + } + + /** + * Creates a {@link Fuzziness} instance from an edit distance. The value must be one of [0, 1, 2] + */ + public static Fuzziness fromEdits(int edits) { + return new Fuzziness(edits); + } + + public static Fuzziness build(Object fuzziness) { + if (fuzziness instanceof Fuzziness) { + return (Fuzziness) fuzziness; + } + String string = fuzziness.toString(); + if (AUTO.asString().equalsIgnoreCase(string)) { + return AUTO; + } + return new Fuzziness(string); + } + + public static Fuzziness parse(XContentParser parser) throws IOException { + XContentParser.Token token = parser.currentToken(); + switch (token) { + case VALUE_STRING: + case VALUE_NUMBER: + final String fuzziness = parser.text(); + if (AUTO.asString().equalsIgnoreCase(fuzziness)) { + return AUTO; + } + try { + final int minimumSimilarity = Integer.parseInt(fuzziness); + switch (minimumSimilarity) { + case 0: + return ZERO; + case 1: + return ONE; + case 2: + return TWO; + default: + return build(fuzziness); + } + } catch (NumberFormatException ex) { + return build(fuzziness); + } + + default: + throw new ElasticsearchIllegalArgumentException("Can't parse fuzziness on token: [" + token + "]"); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + return toXContent(builder, params, true); + } + + public XContentBuilder toXContent(XContentBuilder builder, Params params, boolean includeFieldName) throws IOException { + if (includeFieldName) { + builder.field(X_FIELD_NAME, fuzziness); + } else { + builder.value(fuzziness); + } + return builder; + } + + public int asDistance() { + return asDistance(null); + } + + public int asDistance(String text) { + if (fuzziness instanceof String) { + if (this == AUTO) { //AUTO + final int len = termLen(text); + if (len <= 2) { + return 0; + } else if (len > 5) { + return 2; + } else { + return 1; + } + } + } + return FuzzyQuery.floatToEdits(asFloat(), termLen(text)); + } + + public TimeValue asTimeValue() { + if (this == AUTO) { + return TimeValue.timeValueMillis(1); + } else { + return TimeValue.parseTimeValue(fuzziness.toString(), null); + } + } + + public long asLong() { + if (this == AUTO) { + return 1; + } + try { + return Long.parseLong(fuzziness.toString()); + } catch (NumberFormatException ex) { + return (long) Double.parseDouble(fuzziness.toString()); + } + } + + public int asInt() { + if (this == AUTO) { + return 1; + } + try { + return Integer.parseInt(fuzziness.toString()); + } catch (NumberFormatException ex) { + return (int) Float.parseFloat(fuzziness.toString()); + } + } + + public short asShort() { + if (this == AUTO) { + return 1; + } + try { + return Short.parseShort(fuzziness.toString()); + } catch (NumberFormatException ex) { + return (short) Float.parseFloat(fuzziness.toString()); + } + } + + public byte asByte() { + if (this == AUTO) { + return 1; + } + try { + return Byte.parseByte(fuzziness.toString()); + } catch (NumberFormatException ex) { + return (byte) Float.parseFloat(fuzziness.toString()); + } + } + + public double asDouble() { + if (this == AUTO) { + return 1d; + } + return Double.parseDouble(fuzziness.toString()); + } + + public float asFloat() { + if (this == AUTO) { + return 1f; + } + return Float.parseFloat(fuzziness.toString()); + } + + public float asSimilarity() { + return asSimilarity(null); + } + + public float asSimilarity(String text) { + if (this == AUTO) { + final int len = termLen(text); + if (len <= 2) { + return 0.0f; + } else if (len > 5) { + return 0.5f; + } else { + return 0.66f; + } +// return dist == 0 ? dist : Math.min(0.999f, Math.max(0.0f, 1.0f - ((float) dist/ (float) termLen(text)))); + } + if (fuzziness instanceof Float) { // it's a similarity + return ((Float) fuzziness).floatValue(); + } else if (fuzziness instanceof Integer) { // it's an edit! + int dist = Math.min(((Integer) fuzziness).intValue(), + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); + return Math.min(0.999f, Math.max(0.0f, 1.0f - ((float) dist / (float) termLen(text)))); + } else { + final float similarity = Float.parseFloat(fuzziness.toString()); + if (similarity >= 0.0f && similarity < 1.0f) { + return similarity; + } + } + throw new ElasticsearchIllegalArgumentException("Can't get similarity from fuzziness [" + fuzziness + "]"); + } + + private int termLen(String text) { + return text == null ? 5 : text.codePointCount(0, text.length()); // 5 avg term length in english + } + + public String asString() { + return fuzziness.toString(); + } +} diff --git a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 6b9abf19628e2..181053fb58e12 100644 --- a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -28,6 +28,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider; import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider; import org.elasticsearch.index.fielddata.FieldDataType; @@ -214,7 +215,7 @@ public static Loading parse(String loading, Loading defaultValue) { Filter rangeFilter(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context); - Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions); + Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions); Query prefixQuery(Object value, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java index 06631f8160fb0..51b2f07823dbc 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java @@ -37,6 +37,7 @@ import org.elasticsearch.common.lucene.search.RegexpFilter; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider; @@ -466,9 +467,8 @@ public Filter rangeFilter(Object lowerTerm, Object upperTerm, boolean includeLow } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { - int edits = FuzzyQuery.floatToEdits(Float.parseFloat(minSim), value.codePointCount(0, value.length())); - return new FuzzyQuery(names.createIndexNameTerm(indexedValueForSearch(value)), edits, prefixLength, maxExpansions, transpositions); + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { + return new FuzzyQuery(names.createIndexNameTerm(indexedValueForSearch(value)), fuzziness.asDistance(value), prefixLength, maxExpansions, transpositions); } @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java index a1e92c87b9298..6ceeada7414d7 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java @@ -34,6 +34,7 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -181,14 +182,9 @@ private int parseValueAsInt(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { byte iValue = Byte.parseByte(value); - byte iSim; - try { - iSim = Byte.parseByte(minSim); - } catch (NumberFormatException e) { - iSim = (byte) Float.parseFloat(minSim); - } + byte iSim = fuzziness.asByte(); return NumericRangeQuery.newIntRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java index bc8847f84d2a4..81d1a09cf1561 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java @@ -36,7 +36,7 @@ import org.elasticsearch.common.joda.FormatDateTimeFormatter; import org.elasticsearch.common.joda.Joda; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -291,14 +291,14 @@ private String convertToString(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { long iValue = dateMathParser.parse(value, System.currentTimeMillis()); long iSim; try { - iSim = TimeValue.parseTimeValue(minSim, null).millis(); + iSim = fuzziness.asTimeValue().millis(); } catch (Exception e) { // not a time format - iSim = (long) Double.parseDouble(minSim); + iSim = fuzziness.asLong(); } return NumericRangeQuery.newLongRange(names.indexName(), precisionStep, iValue - iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java index 57cc74db92d9f..4f5e38b3dcbe7 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java @@ -36,6 +36,7 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Numbers; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.util.ByteUtils; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -171,9 +172,9 @@ public BytesRef indexedValueForSearch(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { double iValue = Double.parseDouble(value); - double iSim = Double.parseDouble(minSim); + double iSim = fuzziness.asDouble(); return NumericRangeQuery.newDoubleRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java index ceb11c9cf6920..203b1eb0ba659 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java @@ -37,6 +37,7 @@ import org.elasticsearch.common.Numbers; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.util.ByteUtils; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -181,9 +182,9 @@ private float parseValue(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { float iValue = Float.parseFloat(value); - float iSim = Float.parseFloat(minSim); + final float iSim = fuzziness.asFloat(); return NumericRangeQuery.newFloatRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java index 0c76b8bc316a8..c999bdfce132a 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java @@ -35,6 +35,7 @@ import org.elasticsearch.common.Numbers; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NumericIntegerAnalyzer; @@ -176,14 +177,9 @@ private int parseValue(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { int iValue = Integer.parseInt(value); - int iSim; - try { - iSim = Integer.parseInt(minSim); - } catch (NumberFormatException e) { - iSim = (int) Float.parseFloat(minSim); - } + int iSim = fuzziness.asInt(); return NumericRangeQuery.newIntRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java index ad7ec404568d6..639fcd7f343b3 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java @@ -35,6 +35,7 @@ import org.elasticsearch.common.Numbers; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NumericLongAnalyzer; @@ -165,14 +166,9 @@ public BytesRef indexedValueForSearch(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { long iValue = Long.parseLong(value); - long iSim; - try { - iSim = Long.parseLong(minSim); - } catch (NumberFormatException e) { - iSim = (long) Double.parseDouble(minSim); - } + final long iSim = fuzziness.asLong(); return NumericRangeQuery.newLongRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java index ec6af5644f5a6..7fc7f96bb0772 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java @@ -39,6 +39,7 @@ import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.util.ByteUtils; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -239,7 +240,7 @@ public Filter termFilter(Object value, @Nullable QueryParseContext context) { public abstract Filter rangeFilter(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context); @Override - public abstract Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions); + public abstract Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions); /** * A range filter based on the field data cache. diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java index 4820f248250b1..1d75ec0984366 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java @@ -35,6 +35,7 @@ import org.elasticsearch.common.Numbers; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -180,14 +181,9 @@ private int parseValueAsInt(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { short iValue = Short.parseShort(value); - short iSim; - try { - iSim = Short.parseShort(minSim); - } catch (NumberFormatException e) { - iSim = (short) Float.parseFloat(minSim); - } + short iSim = fuzziness.asShort(); return NumericRangeQuery.newIntRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java index 294fd7f6d2355..32b3a50efad6b 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java @@ -32,6 +32,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NumericFloatAnalyzer; @@ -183,9 +184,9 @@ private float parseValue(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { float iValue = Float.parseFloat(value); - float iSim = Float.parseFloat(minSim); + float iSim = fuzziness.asFloat(); return NumericRangeQuery.newFloatRange(names.indexName(), precisionStep, iValue - iSim, iValue + iSim, diff --git a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java index bfc5ea088cf16..f7ad916219072 100644 --- a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java @@ -34,6 +34,7 @@ import org.elasticsearch.common.Numbers; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -216,17 +217,13 @@ private long parseValue(Object value) { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { long iValue = ipToLong(value); long iSim; try { - iSim = ipToLong(minSim); + iSim = ipToLong(fuzziness.asString()); } catch (ElasticsearchIllegalArgumentException e) { - try { - iSim = Long.parseLong(minSim); - } catch (NumberFormatException e1) { - iSim = (long) Double.parseDouble(minSim); - } + iSim = fuzziness.asLong(); } return NumericRangeQuery.newLongRange(names.indexName(), precisionStep, iValue - iSim, diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryBuilder.java index e0d007e1195c1..f9846d0044bce 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryBuilder.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.query; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; @@ -34,7 +35,7 @@ public class FuzzyLikeThisFieldQueryBuilder extends BaseQueryBuilder implements private Float boost; private String likeText = null; - private Float minSimilarity; + private Fuzziness fuzziness; private Integer prefixLength; private Integer maxQueryTerms; private Boolean ignoreTF; @@ -59,8 +60,8 @@ public FuzzyLikeThisFieldQueryBuilder likeText(String likeText) { return this; } - public FuzzyLikeThisFieldQueryBuilder minSimilarity(float minSimilarity) { - this.minSimilarity = minSimilarity; + public FuzzyLikeThisFieldQueryBuilder fuzziness(Fuzziness fuzziness) { + this.fuzziness = fuzziness; return this; } @@ -119,8 +120,8 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep if (maxQueryTerms != null) { builder.field("max_query_terms", maxQueryTerms); } - if (minSimilarity != null) { - builder.field("min_similarity", minSimilarity); + if (fuzziness != null) { + fuzziness.toXContent(builder, params); } if (prefixLength != null) { builder.field("prefix_length", prefixLength); diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryParser.java b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryParser.java index b2fcc39fc0b5b..8088281553d68 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryParser.java @@ -23,8 +23,10 @@ import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery; import org.apache.lucene.search.Query; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.Analysis; import org.elasticsearch.index.mapper.MapperService; @@ -48,6 +50,8 @@ public class FuzzyLikeThisFieldQueryParser implements QueryParser { public static final String NAME = "flt_field"; + private static final Fuzziness DEFAULT_FUZZINESS = Fuzziness.fromSimilarity(0.5f); + private static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("min_similarity"); @Inject public FuzzyLikeThisFieldQueryParser() { @@ -65,7 +69,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars int maxNumTerms = 25; float boost = 1.0f; String likeText = null; - float minSimilarity = 0.5f; + Fuzziness fuzziness = DEFAULT_FUZZINESS; int prefixLength = 0; boolean ignoreTF = false; Analyzer analyzer = null; @@ -98,8 +102,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars boost = parser.floatValue(); } else if ("ignore_tf".equals(currentFieldName) || "ignoreTF".equals(currentFieldName)) { ignoreTF = parser.booleanValue(); - } else if ("min_similarity".equals(currentFieldName) || "minSimilarity".equals(currentFieldName)) { - minSimilarity = parser.floatValue(); + } else if (FUZZINESS.match(currentFieldName, parseContext.parseFlags())) { + fuzziness = Fuzziness.parse(parser); } else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) { prefixLength = parser.intValue(); } else if ("analyzer".equals(currentFieldName)) { @@ -139,7 +143,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars } FuzzyLikeThisQuery fuzzyLikeThisQuery = new FuzzyLikeThisQuery(maxNumTerms, analyzer); - fuzzyLikeThisQuery.addTerms(likeText, fieldName, minSimilarity, prefixLength); + fuzzyLikeThisQuery.addTerms(likeText, fieldName, fuzziness.asSimilarity(), prefixLength); fuzzyLikeThisQuery.setBoost(boost); fuzzyLikeThisQuery.setIgnoreTF(ignoreTF); @@ -156,4 +160,4 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars } return query; } -} \ No newline at end of file +} diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryBuilder.java index 7b0b20af13156..fafe60e1edb52 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryBuilder.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.query; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; @@ -34,7 +35,7 @@ public class FuzzyLikeThisQueryBuilder extends BaseQueryBuilder implements Boost private Float boost; private String likeText = null; - private Float minSimilarity; + private Fuzziness fuzziness; private Integer prefixLength; private Integer maxQueryTerms; private Boolean ignoreTF; @@ -66,8 +67,8 @@ public FuzzyLikeThisQueryBuilder likeText(String likeText) { return this; } - public FuzzyLikeThisQueryBuilder minSimilarity(float minSimilarity) { - this.minSimilarity = minSimilarity; + public FuzzyLikeThisQueryBuilder fuzziness(Fuzziness fuzziness) { + this.fuzziness = fuzziness; return this; } @@ -132,8 +133,8 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep if (maxQueryTerms != null) { builder.field("max_query_terms", maxQueryTerms); } - if (minSimilarity != null) { - builder.field("min_similarity", minSimilarity); + if (fuzziness != null) { + fuzziness.toXContent(builder, params); } if (prefixLength != null) { builder.field("prefix_length", prefixLength); diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryParser.java b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryParser.java index 73754cfb6aceb..610a136b3920d 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryParser.java @@ -24,7 +24,9 @@ import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery; import org.apache.lucene.search.Query; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.Analysis; @@ -47,6 +49,7 @@ public class FuzzyLikeThisQueryParser implements QueryParser { public static final String NAME = "flt"; + private static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("min_similarity"); @Inject public FuzzyLikeThisQueryParser() { @@ -65,7 +68,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars float boost = 1.0f; List fields = null; String likeText = null; - float minSimilarity = 0.5f; + Fuzziness fuzziness = Fuzziness.TWO; int prefixLength = 0; boolean ignoreTF = false; Analyzer analyzer = null; @@ -86,8 +89,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars boost = parser.floatValue(); } else if ("ignore_tf".equals(currentFieldName) || "ignoreTF".equals(currentFieldName)) { ignoreTF = parser.booleanValue(); - } else if ("min_similarity".equals(currentFieldName) || "minSimilarity".equals(currentFieldName)) { - minSimilarity = parser.floatValue(); + } else if (FUZZINESS.match(currentFieldName, parseContext.parseFlags())) { + fuzziness = Fuzziness.parse(parser); } else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) { prefixLength = parser.intValue(); } else if ("analyzer".equals(currentFieldName)) { @@ -139,7 +142,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars return null; } for (String field : fields) { - query.addTerms(likeText, field, minSimilarity, prefixLength); + query.addTerms(likeText, field, fuzziness.asSimilarity(), prefixLength); } query.setBoost(boost); query.setIgnoreTF(ignoreTF); diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java index c675c169befdb..ab158fb1acc68 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.query; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; @@ -36,7 +37,7 @@ public class FuzzyQueryBuilder extends BaseQueryBuilder implements MultiTermQuer private float boost = -1; - private String minSimilarity; + private Fuzziness fuzziness; private Integer prefixLength; @@ -67,13 +68,8 @@ public FuzzyQueryBuilder boost(float boost) { return this; } - public FuzzyQueryBuilder minSimilarity(float defaultMinSimilarity) { - this.minSimilarity = Float.toString(defaultMinSimilarity); - return this; - } - - public FuzzyQueryBuilder minSimilarity(String defaultMinSimilarity) { - this.minSimilarity = defaultMinSimilarity; + public FuzzyQueryBuilder fuzziness(Fuzziness fuzziness) { + this.fuzziness = fuzziness; return this; } @@ -103,7 +99,7 @@ public FuzzyQueryBuilder queryName(String queryName) { @Override public void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(FuzzyQueryParser.NAME); - if (boost == -1 && minSimilarity == null && prefixLength == null && queryName != null) { + if (boost == -1 && fuzziness == null && prefixLength == null && queryName != null) { builder.field(name, value); } else { builder.startObject(name); @@ -114,8 +110,8 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio if (transpositions != null) { builder.field("transpositions", transpositions); } - if (minSimilarity != null) { - builder.field("min_similarity", minSimilarity); + if (fuzziness != null) { + fuzziness.toXContent(builder, params); } if (prefixLength != null) { builder.field("prefix_length", prefixLength); diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java b/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java index cf0d540cd9dc8..1d13c3ca06f9b 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java @@ -23,7 +23,9 @@ import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.support.QueryParsers; @@ -38,6 +40,9 @@ public class FuzzyQueryParser implements QueryParser { public static final String NAME = "fuzzy"; + private static final Fuzziness DEFAULT_FUZZINESS = Fuzziness.AUTO; + private static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("min_similarity"); + @Inject public FuzzyQueryParser() { @@ -60,8 +65,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars String value = null; float boost = 1.0f; - //LUCENE 4 UPGRADE we should find a good default here I'd vote for 1.0 -> 1 edit - String minSimilarity = "0.5"; + Fuzziness fuzziness = DEFAULT_FUZZINESS; int prefixLength = FuzzyQuery.defaultPrefixLength; int maxExpansions = FuzzyQuery.defaultMaxExpansions; boolean transpositions = false; @@ -80,8 +84,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars value = parser.text(); } else if ("boost".equals(currentFieldName)) { boost = parser.floatValue(); - } else if ("min_similarity".equals(currentFieldName) || "minSimilarity".equals(currentFieldName)) { - minSimilarity = parser.text(); + } else if (FUZZINESS.match(currentFieldName, parseContext.parseFlags())) { + fuzziness = Fuzziness.parse(parser); } else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) { prefixLength = parser.intValue(); } else if ("max_expansions".equals(currentFieldName) || "maxExpansions".equals(currentFieldName)) { @@ -112,14 +116,11 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null) { if (smartNameFieldMappers.hasMapper()) { - query = smartNameFieldMappers.mapper().fuzzyQuery(value, minSimilarity, prefixLength, maxExpansions, transpositions); + query = smartNameFieldMappers.mapper().fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions); } } if (query == null) { - //LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float - int edits = FuzzyQuery.floatToEdits(Float.parseFloat(minSimilarity), - value.codePointCount(0, value.length())); - query = new FuzzyQuery(new Term(fieldName, value), edits, prefixLength, maxExpansions, transpositions); + query = new FuzzyQuery(new Term(fieldName, value), fuzziness.asDistance(value), prefixLength, maxExpansions, transpositions); } if (query instanceof MultiTermQuery) { QueryParsers.setRewriteMethod((MultiTermQuery) query, rewriteMethod); diff --git a/src/main/java/org/elasticsearch/index/query/IndexQueryParserService.java b/src/main/java/org/elasticsearch/index/query/IndexQueryParserService.java index afb099d24fdbc..3701f1a0eb4ab 100644 --- a/src/main/java/org/elasticsearch/index/query/IndexQueryParserService.java +++ b/src/main/java/org/elasticsearch/index/query/IndexQueryParserService.java @@ -26,6 +26,7 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.cache.recycler.CacheRecycler; import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.Queries; @@ -47,6 +48,7 @@ import org.elasticsearch.script.ScriptService; import java.io.IOException; +import java.util.EnumSet; import java.util.List; import java.util.Map; @@ -93,6 +95,7 @@ protected QueryParseContext initialValue() { private String defaultField; private boolean queryStringLenient; + private final boolean strict; @Inject public IndexQueryParserService(Index index, @IndexSettings Settings indexSettings, @@ -114,6 +117,7 @@ public IndexQueryParserService(Index index, @IndexSettings Settings indexSetting this.defaultField = indexSettings.get("index.query.default_field", AllFieldMapper.NAME); this.queryStringLenient = indexSettings.getAsBoolean("index.query_string.lenient", false); + this.strict = indexSettings.getAsBoolean("index.query.parse.strict", false); List queryParsers = newArrayList(); if (namedQueryParsers != null) { @@ -311,6 +315,9 @@ public ParsedQuery parseQuery(BytesReference source) { private ParsedQuery parse(QueryParseContext parseContext, XContentParser parser) throws IOException, QueryParsingException { parseContext.reset(parser); + if (strict) { + parseContext.parseFlags(EnumSet.of(ParseField.Flag.STRICT)); + } Query query = parseContext.parseInnerQuery(); if (query == null) { query = Queries.newMatchNoDocsQuery(); diff --git a/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java index 93ce032b2e0d8..ba985089eaf9c 100644 --- a/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.query; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; @@ -69,7 +70,7 @@ public static enum ZeroTermsQuery { private Integer slop; - private String fuzziness; + private Fuzziness fuzziness; private Integer prefixLength; @@ -82,11 +83,11 @@ public static enum ZeroTermsQuery { private String fuzzyRewrite = null; private Boolean lenient; - + private Boolean fuzzyTranspositions = null; private ZeroTermsQuery zeroTermsQuery; - + private Float cutoff_Frequency = null; private String queryName; @@ -141,10 +142,10 @@ public MatchQueryBuilder slop(int slop) { } /** - * Sets the minimum similarity used when evaluated to a fuzzy query type. Defaults to "0.5". + * Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */ public MatchQueryBuilder fuzziness(Object fuzziness) { - this.fuzziness = fuzziness.toString(); + this.fuzziness = Fuzziness.build(fuzziness); return this; } @@ -161,7 +162,7 @@ public MatchQueryBuilder maxExpansions(int maxExpansions) { this.maxExpansions = maxExpansions; return this; } - + /** * Set a cutoff value in [0..1] (or absolute number >=1) representing the * maximum threshold of a terms document frequency to be considered a low @@ -186,7 +187,7 @@ public MatchQueryBuilder fuzzyRewrite(String fuzzyRewrite) { this.fuzzyRewrite = fuzzyRewrite; return this; } - + public MatchQueryBuilder fuzzyTranspositions(boolean fuzzyTranspositions) { //LUCENE 4 UPGRADE add documentation this.fuzzyTranspositions = fuzzyTranspositions; @@ -236,7 +237,7 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio builder.field("slop", slop); } if (fuzziness != null) { - builder.field("fuzziness", fuzziness); + fuzziness.toXContent(builder, params); } if (prefixLength != null) { builder.field("prefix_length", prefixLength); @@ -269,7 +270,7 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio if (queryName != null) { builder.field("_name", queryName); } - + builder.endObject(); builder.endObject(); diff --git a/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java b/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java index 4708b24e24c80..ba8f8536154ec 100644 --- a/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java @@ -25,6 +25,7 @@ import org.apache.lucene.search.Query; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.support.QueryParsers; import org.elasticsearch.index.search.MatchQuery; @@ -102,8 +103,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars boost = parser.floatValue(); } else if ("slop".equals(currentFieldName) || "phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) { matchQuery.setPhraseSlop(parser.intValue()); - } else if ("fuzziness".equals(currentFieldName)) { - matchQuery.setFuzziness(parser.textOrNull()); + } else if (Fuzziness.FIELD.match(currentFieldName, parseContext.parseFlags())) { + matchQuery.setFuzziness(Fuzziness.parse(parser)); } else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) { matchQuery.setFuzzyPrefixLength(parser.intValue()); } else if ("max_expansions".equals(currentFieldName) || "maxExpansions".equals(currentFieldName)) { diff --git a/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java index b11ac311969c3..19c8177576cb0 100644 --- a/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java @@ -21,6 +21,7 @@ import com.carrotsearch.hppc.ObjectFloatOpenHashMap; import com.google.common.collect.Lists; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; @@ -48,7 +49,7 @@ public class MultiMatchQueryBuilder extends BaseQueryBuilder implements Boostabl private Integer slop; - private String fuzziness; + private Fuzziness fuzziness; private Integer prefixLength; @@ -143,10 +144,10 @@ public MultiMatchQueryBuilder slop(int slop) { } /** - * Sets the minimum similarity used when evaluated to a fuzzy query type. Defaults to "0.5". + * Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */ public MultiMatchQueryBuilder fuzziness(Object fuzziness) { - this.fuzziness = fuzziness.toString(); + this.fuzziness = Fuzziness.build(fuzziness); return this; } @@ -252,7 +253,7 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio builder.field("slop", slop); } if (fuzziness != null) { - builder.field("fuzziness", fuzziness); + fuzziness.toXContent(builder, params); } if (prefixLength != null) { builder.field("prefix_length", prefixLength); diff --git a/src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java b/src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java index 7a3ead68a8a14..68cdedc7a1d16 100644 --- a/src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java @@ -24,6 +24,7 @@ import org.apache.lucene.search.Query; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.support.QueryParsers; import org.elasticsearch.index.search.MatchQuery; @@ -99,8 +100,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars boost = parser.floatValue(); } else if ("slop".equals(currentFieldName) || "phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) { multiMatchQuery.setPhraseSlop(parser.intValue()); - } else if ("fuzziness".equals(currentFieldName)) { - multiMatchQuery.setFuzziness(parser.textOrNull()); + } else if (Fuzziness.FIELD.match(currentFieldName, parseContext.parseFlags())) { + multiMatchQuery.setFuzziness(Fuzziness.parse(parser)); } else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) { multiMatchQuery.setFuzzyPrefixLength(parser.intValue()); } else if ("max_expansions".equals(currentFieldName) || "maxExpansions".equals(currentFieldName)) { diff --git a/src/main/java/org/elasticsearch/index/query/QueryParseContext.java b/src/main/java/org/elasticsearch/index/query/QueryParseContext.java index f0860db4c1b5e..43eb4d86c2da6 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryParseContext.java +++ b/src/main/java/org/elasticsearch/index/query/QueryParseContext.java @@ -29,6 +29,7 @@ import org.apache.lucene.search.similarities.Similarity; import org.elasticsearch.cache.recycler.CacheRecycler; import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.Index; import org.elasticsearch.index.analysis.AnalysisService; @@ -45,10 +46,7 @@ import org.elasticsearch.search.lookup.SearchLookup; import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; -import java.util.Map; -import java.util.Set; +import java.util.*; /** * @@ -85,12 +83,24 @@ public static void removeTypes() { private XContentParser parser; + private EnumSet parseFlags = ParseField.EMPTY_FLAGS; + + public QueryParseContext(Index index, IndexQueryParserService indexQueryParser) { this.index = index; this.indexQueryParser = indexQueryParser; } + public void parseFlags(EnumSet parseFlags) { + this.parseFlags = parseFlags == null ? ParseField.EMPTY_FLAGS : parseFlags; + } + + public EnumSet parseFlags() { + return parseFlags; + } + public void reset(XContentParser jp) { + this.parseFlags = ParseField.EMPTY_FLAGS; this.lookup = null; this.parser = jp; this.namedFilters.clear(); diff --git a/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java index 7d99d41ff1d84..b01ba1e590580 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.query; import com.carrotsearch.hppc.ObjectFloatOpenHashMap; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; @@ -35,7 +36,6 @@ * (using {@link #field(String)}), will run the parsed query against the provided fields, and combine * them either using DisMax or a plain boolean query (see {@link #useDisMax(boolean)}). *

- * (shay.baon) */ public class QueryStringQueryBuilder extends BaseQueryBuilder implements BoostableQueryBuilder { @@ -68,7 +68,7 @@ public static enum Operator { private float boost = -1; - private float fuzzyMinSim = -1; + private Fuzziness fuzziness; private int fuzzyPrefixLength = -1; private int fuzzyMaxExpansions = -1; private String fuzzyRewrite; @@ -226,15 +226,15 @@ public QueryStringQueryBuilder enablePositionIncrements(boolean enablePositionIn } /** - * Set the minimum similarity for fuzzy queries. Default is 0.5f. + * Set the edit distance for fuzzy queries. Default is "AUTO". */ - public QueryStringQueryBuilder fuzzyMinSim(float fuzzyMinSim) { - this.fuzzyMinSim = fuzzyMinSim; + public QueryStringQueryBuilder fuzziness(Fuzziness fuzziness) { + this.fuzziness = fuzziness; return this; } /** - * Set the minimum similarity for fuzzy queries. Default is 0.5f. + * Set the minimum prefix length for fuzzy queries. Default is 1. */ public QueryStringQueryBuilder fuzzyPrefixLength(int fuzzyPrefixLength) { this.fuzzyPrefixLength = fuzzyPrefixLength; @@ -356,8 +356,8 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep if (enablePositionIncrements != null) { builder.field("enable_position_increments", enablePositionIncrements); } - if (fuzzyMinSim != -1) { - builder.field("fuzzy_min_sim", fuzzyMinSim); + if (fuzziness != null) { + fuzziness.toXContent(builder, params); } if (boost != -1) { builder.field("boost", boost); diff --git a/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java b/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java index 00f2aeed6fcd3..6d3dfc4d245aa 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java @@ -25,11 +25,13 @@ import org.apache.lucene.queryparser.classic.QueryParserSettings; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.query.support.QueryParsers; @@ -45,6 +47,7 @@ public class QueryStringQueryParser implements QueryParser { public static final String NAME = "query_string"; + private static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("fuzzy_min_sim"); private final boolean defaultAnalyzeWildcard; private final boolean defaultAllowLeadingWildcard; @@ -167,8 +170,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars qpSettings.fuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull())); } else if ("phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) { qpSettings.phraseSlop(parser.intValue()); - } else if ("fuzzy_min_sim".equals(currentFieldName) || "fuzzyMinSim".equals(currentFieldName)) { - qpSettings.fuzzyMinSim(parser.floatValue()); + } else if (FUZZINESS.match(currentFieldName, parseContext.parseFlags())) { + qpSettings.fuzzyMinSim(Fuzziness.parse(parser).asSimilarity()); } else if ("boost".equals(currentFieldName)) { qpSettings.boost(parser.floatValue()); } else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) { diff --git a/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 51cce04f87c0b..bc5e9e3a45ea5 100644 --- a/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -35,6 +35,7 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.QueryParseContext; @@ -69,7 +70,7 @@ public static enum ZeroTermsQuery { protected int phraseSlop = 0; - protected String fuzziness = null; + protected Fuzziness fuzziness = null; protected int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; @@ -112,7 +113,7 @@ public void setPhraseSlop(int phraseSlop) { this.phraseSlop = phraseSlop; } - public void setFuzziness(String fuzziness) { + public void setFuzziness(Fuzziness fuzziness) { this.fuzziness = fuzziness; } @@ -365,10 +366,7 @@ private Query newTermQuery(@Nullable FieldMapper mapper, Term term) { QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod); } } - String text = term.text(); - //LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float - int edits = FuzzyQuery.floatToEdits(Float.parseFloat(fuzziness), - text.codePointCount(0, text.length())); + int edits = fuzziness.asDistance(term.text()); FuzzyQuery query = new FuzzyQuery(term, edits, fuzzyPrefixLength, maxExpansions, transpositions); QueryParsers.setRewriteMethod(query, rewriteMethod); return query; diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java index cbee27b2915bf..2be279c59e23e 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java @@ -19,6 +19,8 @@ package org.elasticsearch.search.suggest.completion; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.search.suggest.SuggestContextParser; @@ -34,6 +36,7 @@ public class CompletionSuggestParser implements SuggestContextParser { private CompletionSuggester completionSuggester; + private static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("edit_distance"); public CompletionSuggestParser(CompletionSuggester completionSuggester) { this.completionSuggester = completionSuggester; @@ -60,8 +63,8 @@ public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, Ma if (token == XContentParser.Token.FIELD_NAME) { fuzzyConfigName = parser.currentName(); } else if (token.isValue()) { - if ("edit_distance".equals(fuzzyConfigName) || "editDistance".equals(fuzzyConfigName)) { - suggestion.setFuzzyEditDistance(parser.intValue()); + if (FUZZINESS.match(fuzzyConfigName, ParseField.EMPTY_FLAGS)) { + suggestion.setFuzzyEditDistance(Fuzziness.parse(parser).asDistance()); } else if ("transpositions".equals(fuzzyConfigName)) { suggestion.setFuzzyTranspositions(parser.booleanValue()); } else if ("min_length".equals(fuzzyConfigName) || "minLength".equals(fuzzyConfigName)) { diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java index 2c67d2e5f96e6..2059a28d0ae73 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.suggest.completion; import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.search.suggest.SuggestBuilder; @@ -34,18 +35,18 @@ public CompletionSuggestionFuzzyBuilder(String name) { super(name, "completion"); } - private int fuzzyEditDistance = XFuzzySuggester.DEFAULT_MAX_EDITS; + private Fuzziness fuzziness = Fuzziness.ONE; private boolean fuzzyTranspositions = XFuzzySuggester.DEFAULT_TRANSPOSITIONS; private int fuzzyMinLength = XFuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH; private int fuzzyPrefixLength = XFuzzySuggester.DEFAULT_NON_FUZZY_PREFIX; private boolean unicodeAware = XFuzzySuggester.DEFAULT_UNICODE_AWARE; - public int getFuzzyEditDistance() { - return fuzzyEditDistance; + public Fuzziness getFuzziness() { + return fuzziness; } - public CompletionSuggestionFuzzyBuilder setFuzzyEditDistance(int fuzzyEditDistance) { - this.fuzzyEditDistance = fuzzyEditDistance; + public CompletionSuggestionFuzzyBuilder setFuzziness(Fuzziness fuzziness) { + this.fuzziness = fuzziness; return this; } @@ -89,8 +90,8 @@ public CompletionSuggestionFuzzyBuilder setUnicodeAware(boolean unicodeAware) { protected XContentBuilder innerToXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { builder.startObject("fuzzy"); - if (fuzzyEditDistance != XFuzzySuggester.DEFAULT_MAX_EDITS) { - builder.field("edit_distance", fuzzyEditDistance); + if (fuzziness != Fuzziness.ONE) { + fuzziness.toXContent(builder, params); } if (fuzzyTranspositions != XFuzzySuggester.DEFAULT_TRANSPOSITIONS) { builder.field("transpositions", fuzzyTranspositions); diff --git a/src/test/java/org/elasticsearch/common/ParseFieldTests.java b/src/test/java/org/elasticsearch/common/ParseFieldTests.java new file mode 100644 index 0000000000000..d1dca3a482446 --- /dev/null +++ b/src/test/java/org/elasticsearch/common/ParseFieldTests.java @@ -0,0 +1,74 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.common; + +import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.test.ElasticsearchTestCase; + +import java.util.EnumSet; + +import static org.hamcrest.CoreMatchers.*; + +public class ParseFieldTests extends ElasticsearchTestCase { + + public void testParse() { + String[] values = new String[]{"foo_bar", "fooBar"}; + ParseField field = new ParseField(randomFrom(values)); + String[] deprecated = new String[]{"barFoo", "bar_foo"}; + ParseField withDepredcations = field.withDeprecation("Foobar", randomFrom(deprecated)); + assertThat(field, not(sameInstance(withDepredcations))); + assertThat(field.match(randomFrom(values), ParseField.EMPTY_FLAGS), is(true)); + assertThat(field.match("foo bar", ParseField.EMPTY_FLAGS), is(false)); + assertThat(field.match(randomFrom(deprecated), ParseField.EMPTY_FLAGS), is(false)); + assertThat(field.match("barFoo", ParseField.EMPTY_FLAGS), is(false)); + + + assertThat(withDepredcations.match(randomFrom(values), ParseField.EMPTY_FLAGS), is(true)); + assertThat(withDepredcations.match("foo bar", ParseField.EMPTY_FLAGS), is(false)); + assertThat(withDepredcations.match(randomFrom(deprecated), ParseField.EMPTY_FLAGS), is(true)); + assertThat(withDepredcations.match("barFoo", ParseField.EMPTY_FLAGS), is(true)); + + // now with strict mode + EnumSet flags = EnumSet.of(ParseField.Flag.STRICT); + assertThat(field.match(randomFrom(values), flags), is(true)); + assertThat(field.match("foo bar", flags), is(false)); + assertThat(field.match(randomFrom(deprecated), flags), is(false)); + assertThat(field.match("barFoo", flags), is(false)); + + + assertThat(withDepredcations.match(randomFrom(values), flags), is(true)); + assertThat(withDepredcations.match("foo bar", flags), is(false)); + try { + withDepredcations.match(randomFrom(deprecated), flags); + fail(); + } catch (ElasticsearchIllegalArgumentException ex) { + + } + + try { + withDepredcations.match("barFoo", flags); + fail(); + } catch (ElasticsearchIllegalArgumentException ex) { + + } + + + } + +} diff --git a/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java b/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java new file mode 100644 index 0000000000000..448d0522efb04 --- /dev/null +++ b/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java @@ -0,0 +1,199 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.common.unit; + +import org.elasticsearch.common.xcontent.XContent; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.test.ElasticsearchTestCase; +import org.junit.Test; + +import java.io.IOException; + +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.hamcrest.CoreMatchers.*; +import static org.hamcrest.number.IsCloseTo.closeTo; + +public class FuzzinessTests extends ElasticsearchTestCase { + + @Test + public void testNumerics() { + String[] options = new String[]{"1.0", "1", "1.000000"}; + assertThat(Fuzziness.build(randomFrom(options)).asByte(), equalTo((byte) 1)); + assertThat(Fuzziness.build(randomFrom(options)).asInt(), equalTo(1)); + assertThat(Fuzziness.build(randomFrom(options)).asFloat(), equalTo(1f)); + assertThat(Fuzziness.build(randomFrom(options)).asDouble(), equalTo(1d)); + assertThat(Fuzziness.build(randomFrom(options)).asLong(), equalTo(1l)); + assertThat(Fuzziness.build(randomFrom(options)).asShort(), equalTo((short) 1)); + } + + @Test + public void testParseFromXContent() throws IOException { + final int iters = atLeast(10); + for (int i = 0; i < iters; i++) { + { + XContent xcontent = XContentType.JSON.xContent(); + float floatValue = randomFloat(); + String json = jsonBuilder().startObject() + .field(Fuzziness.X_FIELD_NAME, floatValue) + .endObject().string(); + XContentParser parser = xcontent.createParser(json); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); + Fuzziness parse = Fuzziness.parse(parser); + assertThat(parse.asFloat(), equalTo(floatValue)); + assertThat(parse.asDouble(), closeTo((double) floatValue, 0.000001)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + { + XContent xcontent = XContentType.JSON.xContent(); + Integer intValue = frequently() ? randomIntBetween(0, 2) : randomIntBetween(0, 100); + Float floatRep = randomFloat(); + Number value = intValue; + if (randomBoolean()) { + value = new Float(floatRep += intValue); + } + String json = jsonBuilder().startObject() + .field(Fuzziness.X_FIELD_NAME, randomBoolean() ? value.toString() : value) + .endObject().string(); + XContentParser parser = xcontent.createParser(json); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); + assertThat(parser.nextToken(), anyOf(equalTo(XContentParser.Token.VALUE_NUMBER), equalTo(XContentParser.Token.VALUE_STRING))); + Fuzziness parse = Fuzziness.parse(parser); + assertThat(parse.asInt(), equalTo(intValue)); + assertThat((int) parse.asShort(), equalTo(intValue)); + assertThat((int) parse.asByte(), equalTo(intValue)); + assertThat(parse.asLong(), equalTo((long) intValue)); + if (value.intValue() >= 1) { + assertThat(parse.asDistance(), equalTo(Math.min(2, intValue))); + } + assertThat(parser.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + if (intValue.equals(value)) { + switch (intValue) { + case 1: + assertThat(parse, sameInstance(Fuzziness.ONE)); + break; + case 2: + assertThat(parse, sameInstance(Fuzziness.TWO)); + break; + case 0: + assertThat(parse, sameInstance(Fuzziness.ZERO)); + break; + default: + break; + } + } + } + { + XContent xcontent = XContentType.JSON.xContent(); + String json = jsonBuilder().startObject() + .field(Fuzziness.X_FIELD_NAME, randomBoolean() ? "AUTO" : "auto") + .endObject().string(); + if (randomBoolean()) { + json = Fuzziness.AUTO.toXContent(jsonBuilder().startObject(), null).endObject().string(); + } + XContentParser parser = xcontent.createParser(json); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.VALUE_STRING)); + Fuzziness parse = Fuzziness.parse(parser); + assertThat(parse, sameInstance(Fuzziness.AUTO)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + { + String[] values = new String[]{"d", "H", "ms", "s", "S", "w"}; + String actual = randomIntBetween(1, 3) + randomFrom(values); + XContent xcontent = XContentType.JSON.xContent(); + String json = jsonBuilder().startObject() + .field(Fuzziness.X_FIELD_NAME, actual) + .endObject().string(); + XContentParser parser = xcontent.createParser(json); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.VALUE_STRING)); + Fuzziness parse = Fuzziness.parse(parser); + assertThat(parse.asTimeValue(), equalTo(TimeValue.parseTimeValue(actual, null))); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + } + + } + + @Test + public void testAuto() { + final int codePoints = randomIntBetween(0, 10); + String string = randomRealisticUnicodeOfCodepointLength(codePoints); + if (codePoints <= 2) { + assertThat(Fuzziness.AUTO.asDistance(string), equalTo(0)); + assertThat(Fuzziness.fromSimilarity(Fuzziness.AUTO.asSimilarity(string)).asDistance(string), equalTo(0)); + } else if (codePoints > 5) { + assertThat(Fuzziness.AUTO.asDistance(string), equalTo(2)); + assertThat(Fuzziness.fromSimilarity(Fuzziness.AUTO.asSimilarity(string)).asDistance(string), equalTo(2)); + } else { + assertThat(Fuzziness.AUTO.asDistance(string), equalTo(1)); + assertThat(Fuzziness.fromSimilarity(Fuzziness.AUTO.asSimilarity(string)).asDistance(string), equalTo(1)); + } + assertThat(Fuzziness.AUTO.asByte(), equalTo((byte) 1)); + assertThat(Fuzziness.AUTO.asInt(), equalTo(1)); + assertThat(Fuzziness.AUTO.asFloat(), equalTo(1f)); + assertThat(Fuzziness.AUTO.asDouble(), equalTo(1d)); + assertThat(Fuzziness.AUTO.asLong(), equalTo(1l)); + assertThat(Fuzziness.AUTO.asShort(), equalTo((short) 1)); + assertThat(Fuzziness.AUTO.asTimeValue(), equalTo(TimeValue.parseTimeValue("1", TimeValue.timeValueMillis(1)))); + + } + + @Test + public void testAsDistance() { + final int iters = atLeast(10); + for (int i = 0; i < iters; i++) { + Integer integer = Integer.valueOf(randomIntBetween(0, 10)); + String value = "" + (randomBoolean() ? integer.intValue() : integer.floatValue()); + assertThat(Fuzziness.build(value).asDistance(), equalTo(Math.min(2, integer.intValue()))); + } + } + + @Test + public void testSimilarityToDistance() { + assertThat(Fuzziness.fromSimilarity(0.5f).asDistance("ab"), equalTo(1)); + assertThat(Fuzziness.fromSimilarity(0.66f).asDistance("abcefg"), equalTo(2)); + assertThat(Fuzziness.fromSimilarity(0.8f).asDistance("ab"), equalTo(0)); + assertThat(Fuzziness.fromSimilarity(0.8f).asDistance("abcefg"), equalTo(1)); + assertThat((double) Fuzziness.ONE.asSimilarity("abcefg"), closeTo(0.8f, 0.05)); + assertThat((double) Fuzziness.TWO.asSimilarity("abcefg"), closeTo(0.66f, 0.05)); + assertThat((double) Fuzziness.ONE.asSimilarity("ab"), closeTo(0.5f, 0.05)); + + int iters = atLeast(100); + for (int i = 0; i < iters; i++) { + Fuzziness fuzziness = Fuzziness.fromEdits(between(1, 2)); + String string = rarely() ? randomRealisticUnicodeOfLengthBetween(2, 4) : + randomRealisticUnicodeOfLengthBetween(4, 10); + float similarity = fuzziness.asSimilarity(string); + if (similarity != 0.0f) { + Fuzziness similarityBased = Fuzziness.build(similarity); + assertThat((double) similarityBased.asSimilarity(string), closeTo(similarity, 0.05)); + assertThat(similarityBased.asDistance(string), equalTo(Math.min(2, fuzziness.asDistance(string)))); + } + } + } +} diff --git a/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java b/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java index 3eeaf702f6dcb..7da272676bade 100644 --- a/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java +++ b/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java @@ -43,6 +43,7 @@ import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.SettingsModule; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexNameModule; import org.elasticsearch.index.analysis.AnalysisModule; @@ -432,7 +433,7 @@ public void testFuzzyQuery() throws IOException { @Test public void testFuzzyQueryWithFieldsBuilder() throws IOException { IndexQueryParserService queryParser = queryParser(); - Query parsedQuery = queryParser.parse(fuzzyQuery("name.first", "sh").minSimilarity(0.1f).prefixLength(1).boost(2.0f).buildAsBytes()).query(); + Query parsedQuery = queryParser.parse(fuzzyQuery("name.first", "sh").fuzziness(Fuzziness.fromSimilarity(0.1f)).prefixLength(1).boost(2.0f).buildAsBytes()).query(); assertThat(parsedQuery, instanceOf(FuzzyQuery.class)); FuzzyQuery fuzzyQuery = (FuzzyQuery) parsedQuery; assertThat(fuzzyQuery.getTerm(), equalTo(new Term("name.first", "sh"))); diff --git a/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json b/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json index 6ef343ac4549f..3e3d30ffdc0f7 100644 --- a/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json +++ b/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json @@ -2,9 +2,9 @@ "fuzzy":{ "name.first":{ "value":"sh", - "min_similarity":0.1, + "fuzziness":0.1, "prefix_length":1, "boost":2.0 } } -} \ No newline at end of file +} diff --git a/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields2.json b/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields2.json index 371070f66c22b..095ecc6341d7f 100644 --- a/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields2.json +++ b/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields2.json @@ -2,8 +2,8 @@ "fuzzy":{ "age":{ "value":12, - "min_similarity":5, + "fuzziness":5, "boost":2.0 } } -} \ No newline at end of file +} diff --git a/src/test/java/org/elasticsearch/index/query/span-multi-term-fuzzy-range.json b/src/test/java/org/elasticsearch/index/query/span-multi-term-fuzzy-range.json index 4679170c85828..d9ca05b3f3ecd 100644 --- a/src/test/java/org/elasticsearch/index/query/span-multi-term-fuzzy-range.json +++ b/src/test/java/org/elasticsearch/index/query/span-multi-term-fuzzy-range.json @@ -4,7 +4,7 @@ "fuzzy":{ "age":{ "value":12, - "min_similarity":5, + "fuzziness":5, "boost":2.0 } } diff --git a/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchTests.java b/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchTests.java index a600bbcde4805..e80634a467ea5 100644 --- a/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchTests.java +++ b/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchTests.java @@ -34,6 +34,7 @@ import org.elasticsearch.client.Requests; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.MapperException; import org.elasticsearch.index.mapper.MapperParsingException; @@ -502,7 +503,7 @@ public void testThatFuzzySuggesterSupportsEditDistances() throws Exception { // edit distance 2 suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Norw").size(10).setFuzzyEditDistance(2) + new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Norw").size(10).setFuzziness(Fuzziness.TWO) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "Nirvana"); } @@ -520,12 +521,12 @@ public void testThatFuzzySuggesterSupportsTranspositions() throws Exception { refresh(); SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nriv").size(10).setFuzzyTranspositions(false).setFuzzyEditDistance(1) + new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nriv").size(10).setFuzzyTranspositions(false).setFuzziness(Fuzziness.ONE) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo"); suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nriv").size(10).setFuzzyTranspositions(true).setFuzzyEditDistance(1) + new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nriv").size(10).setFuzzyTranspositions(true).setFuzziness(Fuzziness.ONE) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "Nirvana"); } @@ -601,7 +602,7 @@ public void testThatFuzzySuggesterIsUnicodeAware() throws Exception { assertSuggestions(suggestResponse, false, "foo"); // increasing edit distance instead of unicode awareness works again, as this is only a single character - completionSuggestionBuilder.setFuzzyEditDistance(2); + completionSuggestionBuilder.setFuzziness(Fuzziness.TWO); suggestResponse = client().prepareSuggest(INDEX).addSuggestion(completionSuggestionBuilder).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "ööööö"); }