Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solr textContainsPhrase & Solr Client Tokenizer Alignment #4166

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -228,13 +228,16 @@ private void storeTest(String... stores) throws Exception {
true);
final Multimap<String, Object> doc3 = getDocument("Hello Bob, are you there?", -500, 10.1, Geoshape.point(47.0, 10.0), Geoshape.box(46.9, 9.9, 47.1, 10.1), Arrays.asList("7", "8", "9"), Sets.newHashSet("7", "8"), Instant.ofEpochSecond(3),
false);
final Multimap<String, Object> doc4 = getDocument("foo.com bar/test", -1001, 2, Geoshape.point(0, 0.0), Geoshape.box(46.6, 0, 46.9, 0.1), Arrays.asList("10", "11", "12"), Sets.newHashSet("9", "10"), Instant.ofEpochSecond(0),
false);

for (final String store : stores) {
initialize(store);

add(store, "doc1", doc1, true);
add(store, "doc2", doc2, true);
add(store, "doc3", doc3, false);
add(store, "doc3", doc3, true);
add(store, "doc4", doc4, false);

}

Expand Down Expand Up @@ -262,23 +265,24 @@ private void storeTest(String... stores) throws Exception {
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "worl"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "Tomorrow world"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "WorLD HELLO"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "foo.com"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS_FUZZY, "boby"))).count());

assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "A"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "A"))).count());
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "z"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "world"))).count());

assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "A"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "A"))).count());
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "z"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "world"))).count());

assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "A"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "z"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "world"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "z"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "world"))).count());

assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "A"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "z"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "world"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "z"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "world"))).count());

//Ordering
result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "world"), orderTimeDesc))
Expand Down Expand Up @@ -357,25 +361,25 @@ private void storeTest(String... stores) throws Exception {
//String
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.EQUAL, "Tomorrow is the world"))).count());
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.EQUAL, "world"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.NOT_EQUAL, "bob"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.NOT_EQUAL, "bob"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Text.PREFIX, "Tomorrow"))).count());
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Text.PREFIX, "wor"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Text.FUZZY, "Tomorow is the world"))).count());

assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "A"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "A"))).count());
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "z"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "Hello world"))).count());
assertEquals(2, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "Hello world"))).count());

assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "A"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "A"))).count());
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "z"))).count());
assertEquals(2, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "Hello world"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "Hello world"))).count());

assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "A"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "z"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "z"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "Hello world"))).count());

assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "A"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "z"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "z"))).count());
assertEquals(2, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "Hello world"))).count());

try {
Expand Down Expand Up @@ -413,8 +417,7 @@ private void storeTest(String... stores) throws Exception {
assertEquals(2, result.size());

result = tx.queryStream(new IndexQuery(store, Not.of(PredicateCondition.of(TEXT, Text.CONTAINS, "world")))).collect(Collectors.toList());
assertEquals(1, result.size());
assertEquals("doc3", result.get(0));
assertEquals(ImmutableSet.of("doc3", "doc4"), ImmutableSet.copyOf(result));

result = tx.queryStream(new IndexQuery(store, And.of(PredicateCondition.of(TIME, Cmp.EQUAL, -500), Not.of(PredicateCondition.of(TEXT, Text.CONTAINS, "world"))))).collect(Collectors.toList());
assertEquals(1, result.size());
Expand Down Expand Up @@ -449,8 +452,8 @@ private void storeTest(String... stores) throws Exception {
assertEquals(ImmutableSet.of("doc1", "doc2"), ImmutableSet.copyOf(result));

result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.box(46.5, -0.5, 50.5, 10.5)))).collect(Collectors.toList());
assertEquals(3,result.size());
assertEquals(ImmutableSet.of("doc1", "doc2", "doc3"), ImmutableSet.copyOf(result));
assertEquals(4, result.size());
assertEquals(ImmutableSet.of("doc1", "doc2", "doc3", "doc4"), ImmutableSet.copyOf(result));

result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 200.00)))).collect(Collectors.toList());
assertEquals(2, result.size());
Expand All @@ -471,8 +474,8 @@ private void storeTest(String... stores) throws Exception {

result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.DISJOINT, Geoshape.polygon(Arrays.asList(new double[][]
{{-5.0,47.0},{5.0,47.0},{5.0,50.0},{-5.0,50.0},{-5.0,47.0}}))))).collect(Collectors.toList());
assertEquals(1, result.size());
assertEquals(ImmutableSet.of("doc3"), ImmutableSet.copyOf(result));
assertEquals(2, result.size());
assertEquals(ImmutableSet.of("doc3", "doc4"), ImmutableSet.copyOf(result));
}

if (indexFeatures.supportsGeoContains()) {
Expand All @@ -486,8 +489,8 @@ private void storeTest(String... stores) throws Exception {
assertEquals(ImmutableSet.of("doc1","doc2"), ImmutableSet.copyOf(result));

result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.INTERSECT, Geoshape.circle(48.5, 0.5, 200.00)))).collect(Collectors.toList());
assertEquals(2, result.size());
assertEquals(ImmutableSet.of("doc1", "doc2"), ImmutableSet.copyOf(result));
assertEquals(3, result.size());
assertEquals(ImmutableSet.of("doc1", "doc2", "doc4"), ImmutableSet.copyOf(result));

result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.INTERSECT, Geoshape.polygon(Arrays.asList(new double[][] {{-1.0,48.0},{2.0,48.0},{2.0,49.0},{-1.0,49.0},{-1.0,48.0}}))))).collect(Collectors.toList());
assertEquals(2, result.size());
Expand Down Expand Up @@ -516,13 +519,13 @@ private void storeTest(String... stores) throws Exception {
assertEquals(2, tx.queryStream(new RawQuery(store,"text:\"world\"",NO_PARAS)).count());
assertEquals(2, tx.queryStream(new RawQuery(store,"time:[1000 TO 1020]",NO_PARAS)).count());
assertEquals(2, tx.queryStream(new RawQuery(store,"time:[1000 TO *]",NO_PARAS)).count());
assertEquals(3, tx.queryStream(new RawQuery(store,"time:[* TO *]",NO_PARAS)).count());
assertEquals(4, tx.queryStream(new RawQuery(store,"time:[* TO *]",NO_PARAS)).count());
assertEquals(1, tx.queryStream(new RawQuery(store,"weight:[5.1 TO 8.3]",NO_PARAS)).count());
assertEquals(1, tx.queryStream(new RawQuery(store,"weight:5.2",NO_PARAS)).count());
assertEquals(1, tx.queryStream(new RawQuery(store,"text:world AND time:1001",NO_PARAS)).count());
assertEquals(1, tx.queryStream(new RawQuery(store,"name:\"Hello world\"",NO_PARAS)).count());
assertEquals(1, tx.queryStream(new RawQuery(store, "boolean:true", NO_PARAS)).count());
assertEquals(2, tx.queryStream(new RawQuery(store, "boolean:false", NO_PARAS)).count());
assertEquals(3, tx.queryStream(new RawQuery(store, "boolean:false", NO_PARAS)).count());
assertEquals(2, tx.queryStream(new RawQuery(store, "date:{1970-01-01T00:00:01Z TO 1970-01-01T00:00:03Z]", NO_PARAS)).count());
assertEquals(3, tx.queryStream(new RawQuery(store, "date:[1970-01-01T00:00:01Z TO *]", NO_PARAS)).count());
assertEquals(1, tx.queryStream(new RawQuery(store, "date:\"1970-01-01T00:00:02Z\"", NO_PARAS)).count());
Expand Down Expand Up @@ -558,9 +561,9 @@ private void storeTest(String... stores) throws Exception {
assertEquals("doc3", tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.EQUAL, Instant.ofEpochSecond(3)))).findFirst().get());
assertEquals("doc3", tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.GREATER_THAN, Instant.ofEpochSecond(2)))).findFirst().get());
assertEquals(ImmutableSet.of("doc2", "doc3"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.GREATER_THAN_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
assertEquals(ImmutableSet.of("doc1"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
assertEquals(ImmutableSet.of("doc1", "doc2"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
assertEquals(ImmutableSet.of("doc1", "doc3"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.NOT_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
assertEquals(ImmutableSet.of("doc1", "doc4"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
assertEquals(ImmutableSet.of("doc1", "doc2", "doc4"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
assertEquals(ImmutableSet.of("doc1", "doc3", "doc4"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.NOT_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));


//Update some data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@
import org.apache.http.impl.auth.KerberosScheme;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.solr.client.solrj.SolrClient;
Expand Down Expand Up @@ -95,6 +98,7 @@
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.StringReader;
import java.io.UncheckedIOException;
import java.lang.reflect.Constructor;
import java.text.DateFormat;
Expand Down Expand Up @@ -862,7 +866,8 @@
return tokenize(ParameterType.TEXT_ANALYZER, information, value, key, predicate);
} else if (predicate == Text.PREFIX || predicate == Text.CONTAINS_PREFIX
|| predicate == Text.REGEX || predicate == Text.CONTAINS_REGEX
|| predicate == Text.FUZZY || predicate == Text.CONTAINS_FUZZY) {
|| predicate == Text.FUZZY || predicate == Text.CONTAINS_FUZZY
|| predicate == Text.CONTAINS_PHRASE) {
return buildQueryFilterStringValue(key, (String) value, predicate, information);
} else if (predicate == Cmp.LESS_THAN || predicate == Cmp.LESS_THAN_EQUAL
|| predicate == Cmp.GREATER_THAN || predicate == Cmp.GREATER_THAN_EQUAL) {
Expand Down Expand Up @@ -991,6 +996,8 @@
return (stringKey + ":" + escapeValue(value) + "*");
} else if (predicate == Text.CONTAINS_PREFIX) {
return (key + ":" + escapeValue(value) + "*");
} else if (predicate == Text.CONTAINS_PHRASE) {
return (key + ":\"" + escapeValue(value) + "\"");
} else if (predicate == Text.REGEX) {
return (stringKey + ":/" + value + "/");
} else if (predicate == Text.CONTAINS_REGEX) {
Expand Down Expand Up @@ -1027,7 +1034,8 @@
if (analyzer != null) {
terms = customTokenize(analyzer, key, (String) value);
} else if (parameterType == ParameterType.TEXT_ANALYZER) {
terms = Text.tokenize((String) value);
//If a custom tokenizer was not specified, assume the standard one as defined in the default Solr Configset
terms = standardTokenizer((String) value);
} else {
return buildQueryFilterStringValue(key, (String) value, janusgraphPredicate, information);
}
Expand Down Expand Up @@ -1162,13 +1170,15 @@
case DEFAULT:
case TEXT:
return predicate == Text.CONTAINS || predicate == Text.CONTAINS_PREFIX
|| predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY;
|| predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY
|| predicate == Text.CONTAINS_PHRASE;
case STRING:
return predicate instanceof Cmp || predicate==Text.REGEX || predicate==Text.PREFIX || predicate == Text.FUZZY;
case TEXTSTRING:
return predicate instanceof Cmp || predicate == Text.REGEX || predicate == Text.PREFIX || predicate == Text.FUZZY
|| predicate == Text.CONTAINS || predicate == Text.CONTAINS_PREFIX
|| predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY;
|| predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY
|| predicate == Text.CONTAINS_PHRASE;
}
} else if (dataType == Date.class || dataType == Instant.class) {
return predicate instanceof Cmp;
Expand Down Expand Up @@ -1267,6 +1277,20 @@
/*
################# UTILITY METHODS #######################
*/
static List<String> standardTokenizer(String text) {
List<String> result = new ArrayList<>();
try (Tokenizer tokenizer = new StandardTokenizer(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY)) {
tokenizer.setReader(new StringReader(text));
CharTermAttribute attr = tokenizer.addAttribute(CharTermAttribute.class);
tokenizer.reset();
while (tokenizer.incrementToken()) {
result.add(attr.toString());
}
return result;
} catch (IOException e) {
throw new UncheckedIOException(e);

Check warning on line 1291 in janusgraph-solr/src/main/java/org/janusgraph/diskstorage/solr/SolrIndex.java

View check run for this annotation

Codecov / codecov/patch

janusgraph-solr/src/main/java/org/janusgraph/diskstorage/solr/SolrIndex.java#L1290-L1291

Added lines #L1290 - L1291 were not covered by tests
}
}

static Optional<String> getDualFieldName(String fieldKey, KeyInformation ki) {
if (AttributeUtils.isString(ki.getDataType()) && Mapping.getMapping(ki) == Mapping.TEXTSTRING) {
Expand Down