From cd4bad135c0e6a12d9359018cf89a86627b8c54a Mon Sep 17 00:00:00 2001 From: Allan Clements Date: Sat, 2 Dec 2023 17:26:47 -0600 Subject: [PATCH] Fixes #4165 Closes #4164 Added new test document and updated assertions Use tokenizer that mimics Solr's standardized tokenizer Geo predicate tweaking Implemented TextContainsPhrase for Solr Signed-off-by: Allan Clements --- .../indexing/IndexProviderTest.java | 57 ++++++++++--------- .../diskstorage/solr/SolrIndex.java | 32 +++++++++-- .../diskstorage/solr/SolrIndexTest.java | 1 + .../solr/StandardTokenizerTest.java | 32 +++++++++++ 4 files changed, 91 insertions(+), 31 deletions(-) create mode 100644 janusgraph-solr/src/test/java/org/janusgraph/diskstorage/solr/StandardTokenizerTest.java diff --git a/janusgraph-backend-testutils/src/main/java/org/janusgraph/diskstorage/indexing/IndexProviderTest.java b/janusgraph-backend-testutils/src/main/java/org/janusgraph/diskstorage/indexing/IndexProviderTest.java index b39c5f1e69..2e3485afbf 100644 --- a/janusgraph-backend-testutils/src/main/java/org/janusgraph/diskstorage/indexing/IndexProviderTest.java +++ b/janusgraph-backend-testutils/src/main/java/org/janusgraph/diskstorage/indexing/IndexProviderTest.java @@ -228,13 +228,16 @@ private void storeTest(String... stores) throws Exception { true); final Multimap doc3 = getDocument("Hello Bob, are you there?", -500, 10.1, Geoshape.point(47.0, 10.0), Geoshape.box(46.9, 9.9, 47.1, 10.1), Arrays.asList("7", "8", "9"), Sets.newHashSet("7", "8"), Instant.ofEpochSecond(3), false); + final Multimap doc4 = getDocument("foo.com bar/test", -1001, 2, Geoshape.point(0, 0.0), Geoshape.box(46.6, 0, 46.9, 0.1), Arrays.asList("10", "11", "12"), Sets.newHashSet("9", "10"), Instant.ofEpochSecond(0), + false); for (final String store : stores) { initialize(store); add(store, "doc1", doc1, true); add(store, "doc2", doc2, true); - add(store, "doc3", doc3, false); + add(store, "doc3", doc3, true); + add(store, "doc4", doc4, false); } @@ -262,23 +265,24 @@ private void storeTest(String... stores) throws Exception { assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "worl"))).count()); assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "Tomorrow world"))).count()); assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "WorLD HELLO"))).count()); + assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "foo.com"))).count()); assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS_FUZZY, "boby"))).count()); - assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "A"))).count()); + assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "A"))).count()); assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "z"))).count()); assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "world"))).count()); - assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "A"))).count()); + assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "A"))).count()); assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "z"))).count()); assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "world"))).count()); assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "A"))).count()); - assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "z"))).count()); - assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "world"))).count()); + assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "z"))).count()); + assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "world"))).count()); assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "A"))).count()); - assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "z"))).count()); - assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "world"))).count()); + assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "z"))).count()); + assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "world"))).count()); //Ordering result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "world"), orderTimeDesc)) @@ -357,25 +361,25 @@ private void storeTest(String... stores) throws Exception { //String assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.EQUAL, "Tomorrow is the world"))).count()); assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.EQUAL, "world"))).count()); - assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.NOT_EQUAL, "bob"))).count()); + assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.NOT_EQUAL, "bob"))).count()); assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Text.PREFIX, "Tomorrow"))).count()); assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Text.PREFIX, "wor"))).count()); assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Text.FUZZY, "Tomorow is the world"))).count()); - assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "A"))).count()); + assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "A"))).count()); assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "z"))).count()); - assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "Hello world"))).count()); + assertEquals(2, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "Hello world"))).count()); - assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "A"))).count()); + assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "A"))).count()); assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "z"))).count()); - assertEquals(2, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "Hello world"))).count()); + assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "Hello world"))).count()); assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "A"))).count()); - assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "z"))).count()); + assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "z"))).count()); assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "Hello world"))).count()); assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "A"))).count()); - assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "z"))).count()); + assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "z"))).count()); assertEquals(2, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "Hello world"))).count()); try { @@ -413,8 +417,7 @@ private void storeTest(String... stores) throws Exception { assertEquals(2, result.size()); result = tx.queryStream(new IndexQuery(store, Not.of(PredicateCondition.of(TEXT, Text.CONTAINS, "world")))).collect(Collectors.toList()); - assertEquals(1, result.size()); - assertEquals("doc3", result.get(0)); + assertEquals(ImmutableSet.of("doc3", "doc4"), ImmutableSet.copyOf(result)); result = tx.queryStream(new IndexQuery(store, And.of(PredicateCondition.of(TIME, Cmp.EQUAL, -500), Not.of(PredicateCondition.of(TEXT, Text.CONTAINS, "world"))))).collect(Collectors.toList()); assertEquals(1, result.size()); @@ -449,8 +452,8 @@ private void storeTest(String... stores) throws Exception { assertEquals(ImmutableSet.of("doc1", "doc2"), ImmutableSet.copyOf(result)); result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.box(46.5, -0.5, 50.5, 10.5)))).collect(Collectors.toList()); - assertEquals(3,result.size()); - assertEquals(ImmutableSet.of("doc1", "doc2", "doc3"), ImmutableSet.copyOf(result)); + assertEquals(4, result.size()); + assertEquals(ImmutableSet.of("doc1", "doc2", "doc3", "doc4"), ImmutableSet.copyOf(result)); result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 200.00)))).collect(Collectors.toList()); assertEquals(2, result.size()); @@ -471,8 +474,8 @@ private void storeTest(String... stores) throws Exception { result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.DISJOINT, Geoshape.polygon(Arrays.asList(new double[][] {{-5.0,47.0},{5.0,47.0},{5.0,50.0},{-5.0,50.0},{-5.0,47.0}}))))).collect(Collectors.toList()); - assertEquals(1, result.size()); - assertEquals(ImmutableSet.of("doc3"), ImmutableSet.copyOf(result)); + assertEquals(2, result.size()); + assertEquals(ImmutableSet.of("doc3", "doc4"), ImmutableSet.copyOf(result)); } if (indexFeatures.supportsGeoContains()) { @@ -486,8 +489,8 @@ private void storeTest(String... stores) throws Exception { assertEquals(ImmutableSet.of("doc1","doc2"), ImmutableSet.copyOf(result)); result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.INTERSECT, Geoshape.circle(48.5, 0.5, 200.00)))).collect(Collectors.toList()); - assertEquals(2, result.size()); - assertEquals(ImmutableSet.of("doc1", "doc2"), ImmutableSet.copyOf(result)); + assertEquals(3, result.size()); + assertEquals(ImmutableSet.of("doc1", "doc2", "doc4"), ImmutableSet.copyOf(result)); result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.INTERSECT, Geoshape.polygon(Arrays.asList(new double[][] {{-1.0,48.0},{2.0,48.0},{2.0,49.0},{-1.0,49.0},{-1.0,48.0}}))))).collect(Collectors.toList()); assertEquals(2, result.size()); @@ -516,13 +519,13 @@ private void storeTest(String... stores) throws Exception { assertEquals(2, tx.queryStream(new RawQuery(store,"text:\"world\"",NO_PARAS)).count()); assertEquals(2, tx.queryStream(new RawQuery(store,"time:[1000 TO 1020]",NO_PARAS)).count()); assertEquals(2, tx.queryStream(new RawQuery(store,"time:[1000 TO *]",NO_PARAS)).count()); - assertEquals(3, tx.queryStream(new RawQuery(store,"time:[* TO *]",NO_PARAS)).count()); + assertEquals(4, tx.queryStream(new RawQuery(store,"time:[* TO *]",NO_PARAS)).count()); assertEquals(1, tx.queryStream(new RawQuery(store,"weight:[5.1 TO 8.3]",NO_PARAS)).count()); assertEquals(1, tx.queryStream(new RawQuery(store,"weight:5.2",NO_PARAS)).count()); assertEquals(1, tx.queryStream(new RawQuery(store,"text:world AND time:1001",NO_PARAS)).count()); assertEquals(1, tx.queryStream(new RawQuery(store,"name:\"Hello world\"",NO_PARAS)).count()); assertEquals(1, tx.queryStream(new RawQuery(store, "boolean:true", NO_PARAS)).count()); - assertEquals(2, tx.queryStream(new RawQuery(store, "boolean:false", NO_PARAS)).count()); + assertEquals(3, tx.queryStream(new RawQuery(store, "boolean:false", NO_PARAS)).count()); assertEquals(2, tx.queryStream(new RawQuery(store, "date:{1970-01-01T00:00:01Z TO 1970-01-01T00:00:03Z]", NO_PARAS)).count()); assertEquals(3, tx.queryStream(new RawQuery(store, "date:[1970-01-01T00:00:01Z TO *]", NO_PARAS)).count()); assertEquals(1, tx.queryStream(new RawQuery(store, "date:\"1970-01-01T00:00:02Z\"", NO_PARAS)).count()); @@ -558,9 +561,9 @@ private void storeTest(String... stores) throws Exception { assertEquals("doc3", tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.EQUAL, Instant.ofEpochSecond(3)))).findFirst().get()); assertEquals("doc3", tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.GREATER_THAN, Instant.ofEpochSecond(2)))).findFirst().get()); assertEquals(ImmutableSet.of("doc2", "doc3"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.GREATER_THAN_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet())); - assertEquals(ImmutableSet.of("doc1"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN, Instant.ofEpochSecond(2)))).collect(Collectors.toSet())); - assertEquals(ImmutableSet.of("doc1", "doc2"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet())); - assertEquals(ImmutableSet.of("doc1", "doc3"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.NOT_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet())); + assertEquals(ImmutableSet.of("doc1", "doc4"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN, Instant.ofEpochSecond(2)))).collect(Collectors.toSet())); + assertEquals(ImmutableSet.of("doc1", "doc2", "doc4"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet())); + assertEquals(ImmutableSet.of("doc1", "doc3", "doc4"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.NOT_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet())); //Update some data diff --git a/janusgraph-solr/src/main/java/org/janusgraph/diskstorage/solr/SolrIndex.java b/janusgraph-solr/src/main/java/org/janusgraph/diskstorage/solr/SolrIndex.java index 34b45ce7c0..00c60e2fd6 100644 --- a/janusgraph-solr/src/main/java/org/janusgraph/diskstorage/solr/SolrIndex.java +++ b/janusgraph-solr/src/main/java/org/janusgraph/diskstorage/solr/SolrIndex.java @@ -25,7 +25,10 @@ import org.apache.http.impl.auth.KerberosScheme; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.solr.client.solrj.SolrClient; @@ -95,6 +98,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.StringReader; import java.io.UncheckedIOException; import java.lang.reflect.Constructor; import java.text.DateFormat; @@ -862,7 +866,8 @@ public String buildQueryFilter(Condition condition, KeyInform return tokenize(ParameterType.TEXT_ANALYZER, information, value, key, predicate); } else if (predicate == Text.PREFIX || predicate == Text.CONTAINS_PREFIX || predicate == Text.REGEX || predicate == Text.CONTAINS_REGEX - || predicate == Text.FUZZY || predicate == Text.CONTAINS_FUZZY) { + || predicate == Text.FUZZY || predicate == Text.CONTAINS_FUZZY + || predicate == Text.CONTAINS_PHRASE) { return buildQueryFilterStringValue(key, (String) value, predicate, information); } else if (predicate == Cmp.LESS_THAN || predicate == Cmp.LESS_THAN_EQUAL || predicate == Cmp.GREATER_THAN || predicate == Cmp.GREATER_THAN_EQUAL) { @@ -991,6 +996,8 @@ public String buildQueryFilterStringValue(String key, String value, JanusGraphPr return (stringKey + ":" + escapeValue(value) + "*"); } else if (predicate == Text.CONTAINS_PREFIX) { return (key + ":" + escapeValue(value) + "*"); + } else if (predicate == Text.CONTAINS_PHRASE) { + return (key + ":\"" + escapeValue(value) + "\""); } else if (predicate == Text.REGEX) { return (stringKey + ":/" + value + "/"); } else if (predicate == Text.CONTAINS_REGEX) { @@ -1027,7 +1034,8 @@ private String tokenize(ParameterType parameterType, KeyInformation.StoreRetriev if (analyzer != null) { terms = customTokenize(analyzer, key, (String) value); } else if (parameterType == ParameterType.TEXT_ANALYZER) { - terms = Text.tokenize((String) value); + //If a custom tokenizer was not specified, assume the standard one as defined in the default Solr Configset + terms = standardTokenizer((String) value); } else { return buildQueryFilterStringValue(key, (String) value, janusgraphPredicate, information); } @@ -1162,13 +1170,15 @@ public boolean supports(KeyInformation information, JanusGraphPredicate predicat case DEFAULT: case TEXT: return predicate == Text.CONTAINS || predicate == Text.CONTAINS_PREFIX - || predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY; + || predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY + || predicate == Text.CONTAINS_PHRASE; case STRING: return predicate instanceof Cmp || predicate==Text.REGEX || predicate==Text.PREFIX || predicate == Text.FUZZY; case TEXTSTRING: return predicate instanceof Cmp || predicate == Text.REGEX || predicate == Text.PREFIX || predicate == Text.FUZZY || predicate == Text.CONTAINS || predicate == Text.CONTAINS_PREFIX - || predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY; + || predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY + || predicate == Text.CONTAINS_PHRASE; } } else if (dataType == Date.class || dataType == Instant.class) { return predicate instanceof Cmp; @@ -1267,6 +1277,20 @@ public boolean exists() throws BackendException { /* ################# UTILITY METHODS ####################### */ + static List standardTokenizer(String text) { + List result = new ArrayList<>(); + try (Tokenizer tokenizer = new StandardTokenizer(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY)) { + tokenizer.setReader(new StringReader(text)); + CharTermAttribute attr = tokenizer.addAttribute(CharTermAttribute.class); + tokenizer.reset(); + while (tokenizer.incrementToken()) { + result.add(attr.toString()); + } + return result; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } static Optional getDualFieldName(String fieldKey, KeyInformation ki) { if (AttributeUtils.isString(ki.getDataType()) && Mapping.getMapping(ki) == Mapping.TEXTSTRING) { diff --git a/janusgraph-solr/src/test/java/org/janusgraph/diskstorage/solr/SolrIndexTest.java b/janusgraph-solr/src/test/java/org/janusgraph/diskstorage/solr/SolrIndexTest.java index a023dc8330..de763583b8 100644 --- a/janusgraph-solr/src/test/java/org/janusgraph/diskstorage/solr/SolrIndexTest.java +++ b/janusgraph-solr/src/test/java/org/janusgraph/diskstorage/solr/SolrIndexTest.java @@ -98,6 +98,7 @@ public void testSupport() { assertTrue(index.supports(of(String.class, Cardinality.SINGLE, new Parameter("mapping", Mapping.TEXT)), Text.CONTAINS_FUZZY)); assertTrue(index.supports(of(String.class, Cardinality.SINGLE, new Parameter("mapping", Mapping.TEXTSTRING)), Text.REGEX)); assertTrue(index.supports(of(String.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.TEXT)), Text.CONTAINS)); + assertTrue(index.supports(of(String.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.TEXT)), Text.CONTAINS_PHRASE)); assertFalse(index.supports(of(String.class, Cardinality.SINGLE, new Parameter("mapping", Mapping.DEFAULT)), Text.PREFIX)); assertTrue(index.supports(of(String.class, Cardinality.SINGLE, new Parameter("mapping", Mapping.STRING)), Text.PREFIX)); assertTrue(index.supports(of(String.class, Cardinality.SINGLE, new Parameter("mapping", Mapping.STRING)), Text.REGEX)); diff --git a/janusgraph-solr/src/test/java/org/janusgraph/diskstorage/solr/StandardTokenizerTest.java b/janusgraph-solr/src/test/java/org/janusgraph/diskstorage/solr/StandardTokenizerTest.java new file mode 100644 index 0000000000..d0fdc2a0e5 --- /dev/null +++ b/janusgraph-solr/src/test/java/org/janusgraph/diskstorage/solr/StandardTokenizerTest.java @@ -0,0 +1,32 @@ +// Copyright 2023 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.solr; + +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.janusgraph.diskstorage.solr.SolrIndex.standardTokenizer; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class StandardTokenizerTest { + + @Test + public void solrStandardTokenizerBehaviorTest() { + List tokens = standardTokenizer("Please, email john.doe@foo.com by 03-09, re: m37-xq."); + assertEquals(Arrays.asList("Please", "email", "john.doe", "foo.com", "by", "03", "09", "re", "m37", "xq"), tokens); + } +}