Skip to content

Commit

Permalink
Don't apply min frequency smoothing if suggest type is 'always'
Browse files Browse the repository at this point in the history
Using an automatically detected 'min_doc_freq' if suggest type is set to
'always' is counter intuitive. If we suggest always ignore the frequency and
set threshold frequency to 0 to allow all possible candidates to be drawn if
they are within the given bounds.

Closes #3037
  • Loading branch information
s1monw committed May 15, 2013
1 parent 48cb06c commit 8235b89
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 7 deletions.
Expand Up @@ -120,7 +120,7 @@ public CandidateSet drawCandidates(CandidateSet set) throws IOException {
Candidate original = set.originalTerm;
BytesRef term = preFilter(original.term, spare, byteSpare);
final long frequency = original.frequency;
spellchecker.setThresholdFrequency(thresholdFrequency(frequency, dictSize));
spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
List<Candidate> candidates = new ArrayList<Candidate>(suggestSimilar.length);
for (int i = 0; i < suggestSimilar.length; i++) {
Expand Down Expand Up @@ -175,7 +175,7 @@ private double score(long frequency, double errorScore, long dictionarySize) {

protected long thresholdFrequency(long termFrequency, long dictionarySize) {
if (termFrequency > 0) {
return (long) Math.round(termFrequency * (Math.log10(termFrequency - frequencyPlateau) * (1.0 / Math.log10(logBase))) + 1);
return (long) Math.max(0, Math.round(termFrequency * (Math.log10(termFrequency - frequencyPlateau) * (1.0 / Math.log10(logBase))) + 1));
}
return 0;

Expand Down
Expand Up @@ -25,6 +25,7 @@

import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.suggest.Suggest;
import org.hamcrest.Matcher;

/**
Expand Down Expand Up @@ -73,6 +74,24 @@ public static void assertHighlight(SearchResponse resp, int hit, String field, i
assertThat(resp.getHits().hits()[hit].getHighlightFields().get(field).fragments().length, greaterThan(fragment));
assertThat(resp.getHits().hits()[hit].highlightFields().get(field).fragments()[fragment].string(), matcher);
}

public static void assertSuggestionSize(Suggest searchSuggest, int entry, int size, String key) {
assertThat(searchSuggest, notNullValue());
assertThat(searchSuggest.size(),greaterThanOrEqualTo(1));
assertThat(searchSuggest.getSuggestion(key).getName(), equalTo(key));
assertThat(searchSuggest.getSuggestion(key).getEntries().size(), greaterThanOrEqualTo(entry));
assertThat(searchSuggest.getSuggestion(key).getEntries().get(entry).getOptions().size(), equalTo(size));

}

public static void assertSuggestion(Suggest searchSuggest, int entry, int ord, String key, String text) {
assertThat(searchSuggest, notNullValue());
assertThat(searchSuggest.size(), greaterThanOrEqualTo(1));
assertThat(searchSuggest.getSuggestion(key).getName(), equalTo(key));
assertThat(searchSuggest.getSuggestion(key).getEntries().size(), greaterThanOrEqualTo(entry));
assertThat(searchSuggest.getSuggestion(key).getEntries().get(entry).getOptions().size(), greaterThan(ord));
assertThat(searchSuggest.getSuggestion(key).getEntries().get(entry).getOptions().get(ord).getText().string(), equalTo(text));
}

/*
* matchers
Expand Down
Expand Up @@ -29,11 +29,11 @@
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.nullValue;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

Expand All @@ -42,18 +42,15 @@
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.action.search.ShardSearchFailure;
import org.elasticsearch.action.suggest.SuggestRequestBuilder;
import org.elasticsearch.action.suggest.SuggestResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.ImmutableSettings.Builder;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.elasticsearch.search.suggest.SuggestBuilder.SuggestionBuilder;
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder;
import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
import org.elasticsearch.test.integration.AbstractNodesTests;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
Expand Down Expand Up @@ -82,6 +79,60 @@ protected Client getClient() {
return client("server1");
}

@Test // see #3037
public void testSuggestModes() throws IOException {
Builder builder = ImmutableSettings.builder();
builder.put("index.number_of_shards", 1).put("index.number_of_replicas", 0);
builder.put("index.analysis.analyzer.biword.tokenizer", "standard");
builder.putArray("index.analysis.analyzer.biword.filter", "shingler", "lowercase");
builder.put("index.analysis.filter.shingler.type", "shingle");
builder.put("index.analysis.filter.shingler.min_shingle_size", 2);
builder.put("index.analysis.filter.shingler.max_shingle_size", 3);

XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("name")
.field("type", "multi_field")
.field("path", "just_name")
.startObject("fields")
.startObject("name")
.field("type", "string")
.endObject()
.startObject("name_shingled")
.field("type", "string")
.field("index_analyzer", "biword")
.field("search_analyzer", "standard")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject().endObject();
client.admin().indices().prepareDelete().execute().actionGet();
client.admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet();
client.admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
client.prepareIndex("test", "type1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like iced tea").endObject()).execute().actionGet();
client.prepareIndex("test", "type1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like tea.").endObject()).execute().actionGet();
client.prepareIndex("test", "type1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like ice cream.").endObject()).execute().actionGet();
client.admin().indices().prepareRefresh().execute().actionGet();
Suggest searchSuggest = searchSuggest(
client,
"ice tea",
phraseSuggestion("did_you_mean").field("name_shingled")
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
.gramSize(3));
ElasticsearchAssertions.assertSuggestion(searchSuggest, 0, 0, "did_you_mean", "iced tea");
searchSuggest = searchSuggest(
client,
"ice tea",
phraseSuggestion("did_you_mean").field("name_shingled")
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).maxEdits(2))
.gramSize(3));
assertSuggestionSize(searchSuggest, 0, 0, "did_you_mean");
}

@Test // see #2729
public void testSizeOneShard() throws Exception {
client.admin().indices().prepareDelete().execute().actionGet();
Expand Down

0 comments on commit 8235b89

Please sign in to comment.