OHDSI · anthonysena · Dec 12, 2019 · May 9, 2019 · Dec 4, 2019 · Dec 5, 2019
diff --git a/pom.xml b/pom.xml
@@ -46,6 +46,7 @@
     <person.viewDates>false</person.viewDates>
     <!-- Full Text Search With SOLR Settings -->
     <solr.endpoint></solr.endpoint>
+    <solr.query.prefix>{!complexphrase inOrder=true}</solr.query.prefix>
     <solr.version>8.3.1</solr.version>
     <!-- Heracles properties -->
     <heracles.smallcellcount>5</heracles.smallcellcount>

diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/SolrSearchClient.java b/src/main/java/org/ohdsi/webapi/vocabulary/SolrSearchClient.java
@@ -1,11 +1,15 @@
 package org.ohdsi.webapi.vocabulary;
 
+import java.util.Arrays;
 import java.util.HashSet;
+import java.util.List;
+import java.util.regex.Pattern;
 import org.apache.commons.lang.StringUtils;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.request.CoreAdminRequest;
 import org.apache.solr.client.solrj.response.CoreAdminResponse;
+import org.apache.solr.client.solrj.util.ClientUtils;
 import org.apache.solr.common.params.CoreAdminParams;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Component;
@@ -15,6 +19,11 @@ public class SolrSearchClient {
     @Value("${solr.endpoint}")
     private String solrEndpoint;
 
+    @Value("${solr.query.prefix}")
+    private String solrQueryPrefix;
+
+    public static final List<String> SOLR_ESCAPE_CHARACTERS = Arrays.asList("(", ")", "{", "}", "[", "]", "^", "\"", ":");
+
     public boolean enabled() {
         return !StringUtils.isEmpty(solrEndpoint);
     }
@@ -41,4 +50,30 @@ public HashSet<String> getCores() throws Exception {
         return returnVal;
     }
 
+    public String formatSearchQuery(String query) {
+        return formatSearchQuery(query, true);
+    }
+
+    public String formatSearchQuery(String query, Boolean useWildcardSearch) {
+        String returnVal;
+        if (useWildcardSearch) {
+            returnVal = solrQueryPrefix + "query:\"*" + ClientUtils.escapeQueryChars(query) + "*\"";
+        } else {
+            returnVal = "query:" + escapeNonWildcardQuery(query);
+        }
+        System.out.println(returnVal);
+        return returnVal;
+    }
+
+    // This escape function is used when building the non wildcard
+    // query since the ClientUtils.escapeQueryChars will replace 
+    // add an extra "\" to spaces which can change the query results.
+    // So, here we escape a subset of the special characters for
+    // this edge case
+    public String escapeNonWildcardQuery(String query) {
+        for (String item : SOLR_ESCAPE_CHARACTERS) {
+            query = query.replace(item, "\\" + item);
+        }
+        return query;
+    }
 }
diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/SolrSearchProvider.java b/src/main/java/org/ohdsi/webapi/vocabulary/SolrSearchProvider.java
@@ -12,13 +12,18 @@
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.BaseHttpSolrClient.RemoteSolrException;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.springframework.beans.factory.annotation.Autowired;
 
 @Component
 public class SolrSearchProvider implements SearchProvider {
+    protected final Logger log = LoggerFactory.getLogger(getClass());
+
     @Autowired
     SolrSearchClient solrSearchClient;
 
@@ -33,12 +38,32 @@ public Collection<Concept> executeSearch(SearchProviderConfig config, String que
         SolrClient client = solrSearchClient.getSolrClient(config.getVersionKey());
 
         SolrQuery q = new SolrQuery();
-        q.setQuery("query:" + query);
+        SolrDocumentList results = new SolrDocumentList();
+        QueryResponse response;
         q.setStart(0);
         q.setRows(Integer.parseInt(rows));
-
-        QueryResponse response = client.query(q);
-        SolrDocumentList results = response.getResults();
+        Boolean solrSearchError = false;
+        try {
+            q.setQuery(solrSearchClient.formatSearchQuery(query));
+            response = client.query(q);
+            results = response.getResults();
+        } catch (RemoteSolrException rse) {
+            // In this case, the default wildcard search did not work
+            // properly. Log this error and try an alternative approach.
+            log.error("SOLR Search Query: \"" + query + "\" failed with message: " + rse.getMessage());
+            solrSearchError = true;
+        }
+
+        // If we did not receive results from issuing the initial wildcard
+        // query OR there was an exception usually due to a maxBooleanClause 
+        // violation from doing a wildcard search on a very common term, then 
+        // we will make another attempt using the standard query approach
+        if (results.isEmpty() || solrSearchError) {
+            q.setQuery(solrSearchClient.formatSearchQuery(query, Boolean.FALSE));
+            response = client.query(q);
+            results = response.getResults();
+        }
+
         for (int i = 0; i < results.size(); ++i) {
             SolrDocument d = results.get(i);
             Concept c = new Concept();

diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties
@@ -85,6 +85,7 @@ security.cas.cassvcs=${security.cas.cassvcs}
 security.cas.casticket=${security.cas.casticket}
 # Full Text Search settings
 solr.endpoint = ${solr.endpoint}
+solr.query.prefix = ${solr.query.prefix}
 # Enabling Compression
 compression=on
 compressableMimeType=application/json,application/xml,text/html,text/xml,text/plain

diff --git a/src/main/resources/solr/conf/lang/stopwords_en.txt b/src/main/resources/solr/conf/lang/stopwords_en.txt
@@ -19,36 +19,4 @@ stopworda
 stopwordb
 
 # Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-such
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
+
diff --git a/src/main/resources/solr/conf/managed-schema b/src/main/resources/solr/conf/managed-schema
@@ -305,12 +305,14 @@
         <filter class="solr.FlattenGraphFilterFactory"/>
         -->
         <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
       </analyzer>
       <analyzer type="query">
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
         <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
         <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
       </analyzer>
     </fieldType>