Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updates for solr based search #1091

Merged
merged 9 commits into from Dec 12, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions pom.xml
Expand Up @@ -46,6 +46,7 @@
<person.viewDates>false</person.viewDates>
<!-- Full Text Search With SOLR Settings -->
<solr.endpoint></solr.endpoint>
<solr.query.prefix>{!complexphrase inOrder=true}</solr.query.prefix>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updating the default query behavior to use the SOLR ComplexPhraseQueryParser: https://lucene.apache.org/solr/guide/6_6/other-parsers.html#OtherParsers-ComplexPhraseQueryParser. This will allow for searches that are closer in behavior to the SQL Wildcard search.

<solr.version>8.3.1</solr.version>
<!-- Heracles properties -->
<heracles.smallcellcount>5</heracles.smallcellcount>
Expand Down
35 changes: 35 additions & 0 deletions src/main/java/org/ohdsi/webapi/vocabulary/SolrSearchClient.java
@@ -1,11 +1,15 @@
package org.ohdsi.webapi.vocabulary;

import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.CoreAdminRequest;
import org.apache.solr.client.solrj.response.CoreAdminResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.params.CoreAdminParams;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
Expand All @@ -15,6 +19,11 @@ public class SolrSearchClient {
@Value("${solr.endpoint}")
private String solrEndpoint;

@Value("${solr.query.prefix}")
private String solrQueryPrefix;

public static final List<String> SOLR_ESCAPE_CHARACTERS = Arrays.asList("(", ")", "{", "}", "[", "]", "^", "\"", ":");

public boolean enabled() {
return !StringUtils.isEmpty(solrEndpoint);
}
Expand All @@ -41,4 +50,30 @@ public HashSet<String> getCores() throws Exception {
return returnVal;
}

public String formatSearchQuery(String query) {
return formatSearchQuery(query, true);
}

public String formatSearchQuery(String query, Boolean useWildcardSearch) {
String returnVal;
if (useWildcardSearch) {
returnVal = solrQueryPrefix + "query:\"*" + ClientUtils.escapeQueryChars(query) + "*\"";
} else {
returnVal = "query:" + escapeNonWildcardQuery(query);
}
System.out.println(returnVal);
return returnVal;
}

// This escape function is used when building the non wildcard
// query since the ClientUtils.escapeQueryChars will replace
// add an extra "\" to spaces which can change the query results.
// So, here we escape a subset of the special characters for
// this edge case
public String escapeNonWildcardQuery(String query) {
for (String item : SOLR_ESCAPE_CHARACTERS) {
query = query.replace(item, "\\" + item);
}
return query;
}
}
33 changes: 29 additions & 4 deletions src/main/java/org/ohdsi/webapi/vocabulary/SolrSearchProvider.java
Expand Up @@ -12,13 +12,18 @@
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.BaseHttpSolrClient.RemoteSolrException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;

@Component
public class SolrSearchProvider implements SearchProvider {
protected final Logger log = LoggerFactory.getLogger(getClass());

@Autowired
SolrSearchClient solrSearchClient;

Expand All @@ -33,12 +38,32 @@ public Collection<Concept> executeSearch(SearchProviderConfig config, String que
SolrClient client = solrSearchClient.getSolrClient(config.getVersionKey());

SolrQuery q = new SolrQuery();
q.setQuery("query:" + query);
SolrDocumentList results = new SolrDocumentList();
QueryResponse response;
q.setStart(0);
q.setRows(Integer.parseInt(rows));

QueryResponse response = client.query(q);
SolrDocumentList results = response.getResults();
Boolean solrSearchError = false;
try {
q.setQuery(solrSearchClient.formatSearchQuery(query));
response = client.query(q);
results = response.getResults();
} catch (RemoteSolrException rse) {
// In this case, the default wildcard search did not work
// properly. Log this error and try an alternative approach.
log.error("SOLR Search Query: \"" + query + "\" failed with message: " + rse.getMessage());
solrSearchError = true;
}

// If we did not receive results from issuing the initial wildcard
// query OR there was an exception usually due to a maxBooleanClause
// violation from doing a wildcard search on a very common term, then
// we will make another attempt using the standard query approach
if (results.isEmpty() || solrSearchError) {
q.setQuery(solrSearchClient.formatSearchQuery(query, Boolean.FALSE));
response = client.query(q);
results = response.getResults();
}

for (int i = 0; i < results.size(); ++i) {
SolrDocument d = results.get(i);
Concept c = new Concept();
Expand Down
1 change: 1 addition & 0 deletions src/main/resources/application.properties
Expand Up @@ -85,6 +85,7 @@ security.cas.cassvcs=${security.cas.cassvcs}
security.cas.casticket=${security.cas.casticket}
# Full Text Search settings
solr.endpoint = ${solr.endpoint}
solr.query.prefix = ${solr.query.prefix}
# Enabling Compression
compression=on
compressableMimeType=application/json,application/xml,text/html,text/xml,text/plain
Expand Down
34 changes: 1 addition & 33 deletions src/main/resources/solr/conf/lang/stopwords_en.txt
Expand Up @@ -19,36 +19,4 @@ stopworda
stopwordb

# Standard english stop words taken from Lucene's StopAnalyzer
a
an
and
are
as
at
be
but
by
for
if
in
into
is
it
no
not
of
on
or
such
that
the
their
then
there
these
they
this
to
was
will
with

2 changes: 2 additions & 0 deletions src/main/resources/solr/conf/managed-schema
Expand Up @@ -305,12 +305,14 @@
<filter class="solr.FlattenGraphFilterFactory"/>
-->
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
</fieldType>

Expand Down