-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Lucene Query can converted to Bson
- Loading branch information
QuadStingray
committed
Mar 29, 2023
1 parent
753a633
commit e56e5d4
Showing
5 changed files
with
341 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 4 additions & 0 deletions
4
src/main/scala/dev/mongocamp/driver/mongodb/exception/NotSupportedException.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
package dev.mongocamp.driver.mongodb.exception | ||
|
||
class NotSupportedException(message: String) extends Exception(message) | ||
|
194 changes: 194 additions & 0 deletions
194
src/main/scala/dev/mongocamp/driver/mongodb/lucene/LuceneQueryConverter.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,194 @@ | ||
package dev.mongocamp.driver.mongodb.lucene | ||
|
||
import com.typesafe.scalalogging.LazyLogging | ||
import dev.mongocamp.driver.mongodb._ | ||
import dev.mongocamp.driver.mongodb.exception.NotSupportedException | ||
import org.apache.lucene.queryparser.classic.QueryParser | ||
import org.apache.lucene.search.BooleanClause.Occur | ||
import org.apache.lucene.search._ | ||
import org.mongodb.scala.bson.conversions.Bson | ||
|
||
import java.text.SimpleDateFormat | ||
import java.util.Date | ||
import scala.collection.mutable | ||
import scala.collection.mutable.ArrayBuffer | ||
import scala.jdk.CollectionConverters._ | ||
|
||
object LuceneQueryConverter extends LazyLogging { | ||
|
||
def getMongoDbSearch(query: Query): Bson = { | ||
getMongoDbSearchMap(query, false) | ||
} | ||
|
||
def parse(queryString: String, defaultField: String): Query = { | ||
var analyzer = new MongoCampLuceneAnalyzer() | ||
val queryParser = new QueryParser(defaultField, analyzer) | ||
queryParser.setAllowLeadingWildcard(true) | ||
val query = queryParser.parse(queryString) | ||
analyzer.close() | ||
analyzer = null | ||
query | ||
} | ||
|
||
private def getMongoDbSearchMap(query: Query, negated: Boolean): Map[String, Any] = { | ||
val searchMapResponse = mutable.Map[String, Any]() | ||
query match { | ||
case booleanQuery: BooleanQuery => | ||
appendBooleanQueryToSearchMap(searchMapResponse, booleanQuery) | ||
case termRangeQuery: TermRangeQuery => | ||
appendTermRangeQueryToSearchMap(negated, searchMapResponse, termRangeQuery) | ||
case termQuery: TermQuery => | ||
appendTermQueryToSearchMap(negated, searchMapResponse, termQuery) | ||
case query: PrefixQuery => | ||
appendPrefixQueryToSearchMap(negated, searchMapResponse, query) | ||
case query: WildcardQuery => | ||
appendWildCardQueryToSearchMap(negated, searchMapResponse, query) | ||
case query: PhraseQuery => | ||
appendPhraseQueryToSearchMap(negated, searchMapResponse, query) | ||
case a: Any => | ||
logger.error(s"Unexpected QueryType <${a.getClass.getSimpleName}>") | ||
} | ||
searchMapResponse.toMap | ||
|
||
} | ||
private def appendBooleanQueryToSearchMap(searchMapResponse: mutable.Map[String, Any], booleanQuery: BooleanQuery): Unit = { | ||
val subQueries = booleanQuery.clauses().asScala | ||
val listOfAnd = ArrayBuffer[Map[String, Any]]() | ||
val listOfOr = ArrayBuffer[Map[String, Any]]() | ||
var nextTypeAnd = true | ||
subQueries | ||
.foreach(c => { | ||
val queryMap = getMongoDbSearchMap(c.getQuery, c.isProhibited) | ||
var thisTypeAnd = true | ||
|
||
if (c.getOccur == Occur.MUST) { | ||
thisTypeAnd = true | ||
} | ||
else if (c.getOccur == Occur.SHOULD) { | ||
thisTypeAnd = false | ||
} | ||
else if (c.getOccur == Occur.MUST_NOT) { | ||
// searchMapResponse ++= queryMap | ||
} | ||
else { | ||
logger.error(s"Unexpected Occur <${c.getOccur.name()}>") | ||
throw new NotSupportedException(s"${c.getOccur.name()} currently not supported") | ||
} | ||
|
||
if (nextTypeAnd && thisTypeAnd) { | ||
listOfAnd += queryMap | ||
} | ||
else { | ||
listOfOr += queryMap | ||
} | ||
nextTypeAnd = thisTypeAnd | ||
}) | ||
|
||
if (listOfAnd.nonEmpty) { | ||
searchMapResponse.put("$and", listOfAnd.toList) | ||
} | ||
if (listOfOr.nonEmpty) { | ||
searchMapResponse.put("$or", listOfOr.toList) | ||
} | ||
} | ||
private def appendTermRangeQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], termRangeQuery: TermRangeQuery): Unit = { | ||
val lowerBound = checkAndConvertValue(new String(termRangeQuery.getLowerTerm.bytes)) | ||
val upperBound = checkAndConvertValue(new String(termRangeQuery.getUpperTerm.bytes)) | ||
val inRangeSearch = Map("$lte" -> upperBound, "$gte" -> lowerBound) | ||
if (negated) { | ||
searchMapResponse.put(termRangeQuery.getField, Map("$not" -> inRangeSearch)) | ||
} | ||
else { | ||
searchMapResponse.put(termRangeQuery.getField, inRangeSearch) | ||
} | ||
} | ||
private def appendTermQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], termQuery: TermQuery): Unit = { | ||
if (negated) { | ||
searchMapResponse.put(termQuery.getTerm.field(), Map("$ne" -> checkAndConvertValue(termQuery.getTerm.text()))) | ||
} | ||
else { | ||
searchMapResponse.put(termQuery.getTerm.field(), Map("$eq" -> checkAndConvertValue(termQuery.getTerm.text()))) | ||
} | ||
} | ||
private def appendPrefixQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], query: PrefixQuery): Unit = { | ||
val searchValue = s"${checkAndConvertValue(query.getPrefix.text())}(.*?)" | ||
val listOfSearches: List[Bson] = List(Map(query.getField -> generateRegexQuery(s"$searchValue", "i"))) | ||
if (negated) { | ||
searchMapResponse.put("$nor", listOfSearches) | ||
} | ||
else { | ||
searchMapResponse ++= Map("$and" -> listOfSearches) | ||
} | ||
} | ||
private def appendWildCardQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], query: WildcardQuery): Unit = { | ||
val searchValue = checkAndConvertValue(query.getTerm.text().replace("*", "(.*?)")) | ||
if (negated) { | ||
searchMapResponse.put(query.getField, Map("$not" -> generateRegexQuery(s"$searchValue", "i"))) | ||
} | ||
else { | ||
searchMapResponse.put(query.getField, generateRegexQuery(s"$searchValue", "i")) | ||
} | ||
} | ||
private def appendPhraseQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], query: PhraseQuery): Unit = { | ||
val listOfSearches = query.getTerms | ||
.map(term => Map(term.field() -> generateRegexQuery(s"(.*?)${checkAndConvertValue(term.text())}(.*?)", "i"))) | ||
.toList | ||
if (negated) { | ||
searchMapResponse.put("$nor", listOfSearches) | ||
} | ||
else { | ||
searchMapResponse ++= Map("$and" -> listOfSearches) | ||
} | ||
} | ||
private def generateRegexQuery(pattern: String, options: String): Map[String, String] = { | ||
Map("$regex" -> pattern, "$options" -> options) | ||
} | ||
private def checkAndConvertValue(s: String): Serializable = { | ||
try { | ||
if (s.toDoubleOption.getOrElse("").toString.equals(s)) { | ||
s.toDouble | ||
} | ||
else if (s.toLongOption.getOrElse("").toString.equals(s)) { | ||
s.toLong | ||
} | ||
else if (s.toBooleanOption.getOrElse("").toString.equals(s)) { | ||
s.toBoolean | ||
} | ||
else { | ||
val parsedOptions: Option[Date] = datePatters | ||
.map(pattern => { | ||
try { | ||
val formatter = new SimpleDateFormat(pattern) | ||
Option(formatter.parse(s)) | ||
} | ||
catch { | ||
case _: Exception => | ||
None | ||
} | ||
}) | ||
.find(_.nonEmpty) | ||
.flatten | ||
parsedOptions.getOrElse(s) | ||
} | ||
} | ||
catch { | ||
case _: Exception => | ||
s | ||
} | ||
} | ||
|
||
private lazy val datePatters = List( | ||
"yyyyMMdd'T'HHmmssSSSZZ", | ||
"yyyyMMdd'T'HHmmssZZ", | ||
"yyyyMMdd'T'HHmmZZ", | ||
"yyyyMMdd'T'HHmmssSSS", | ||
"yyyyMMdd'T'HHmmss", | ||
"yyyyMMdd'T'HHmm", | ||
"yyyy-MM-dd'T'HH:mm:ss.SSSZZ", | ||
"yyyy-MM-dd'T'HH:mm:ssZZ", | ||
"yyyy-MM-dd'T'HH:mmZZ", | ||
"yyyy-MM-dd'T'HH:mm:ss.SSS", | ||
"yyyy-MM-dd'T'HH:mm:ss", | ||
"yyyy-MM-dd'T'HH:mm" | ||
) | ||
} |
30 changes: 30 additions & 0 deletions
30
src/main/scala/dev/mongocamp/driver/mongodb/lucene/MongoCampLuceneAnalyzer.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package dev.mongocamp.driver.mongodb.lucene | ||
import org.apache.lucene.analysis._ | ||
import org.apache.lucene.analysis.standard.StandardTokenizer | ||
|
||
import java.io.Reader | ||
|
||
class MongoCampLuceneAnalyzer(stopWords: CharArraySet = CharArraySet.EMPTY_SET, maxTokenLength: Int = MongoCampLuceneAnalyzer.defaultMaxTokenLength) | ||
extends StopwordAnalyzerBase { | ||
|
||
override protected def createComponents(fieldName: String): Analyzer.TokenStreamComponents = { | ||
val src = new StandardTokenizer | ||
src.setMaxTokenLength(maxTokenLength) | ||
val tok: TokenStream = new StopFilter(src, stopwords) | ||
new Analyzer.TokenStreamComponents( | ||
(r: Reader) => { | ||
src.setMaxTokenLength(maxTokenLength) | ||
src.setReader(r) | ||
|
||
}, | ||
tok | ||
) | ||
} | ||
|
||
override protected def normalize(fieldName: String, in: TokenStream): TokenStream = in | ||
|
||
} | ||
|
||
object MongoCampLuceneAnalyzer { | ||
private val defaultMaxTokenLength: Int = 255 | ||
} |
111 changes: 111 additions & 0 deletions
111
src/test/scala/dev/mongocamp/driver/mongodb/lucene/LuceneSearchSpec.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
package dev.mongocamp.driver.mongodb.lucene | ||
|
||
import dev.mongocamp.driver.mongodb._ | ||
import dev.mongocamp.driver.mongodb.dao.PersonSpecification | ||
import dev.mongocamp.driver.mongodb.test.TestDatabase._ | ||
|
||
class LuceneSearchSpec extends PersonSpecification { | ||
lazy val sortByBalance = Map("balance" -> -1) | ||
|
||
"LuceneSearch" should { | ||
|
||
"search with extended query" in { | ||
val luceneQuery = LuceneQueryConverter.parse("(favoriteFruit:\"apple\" AND age:\"25\") OR name:*Cecile* AND -active:false AND 123", "id") | ||
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList() | ||
search must haveSize(1) | ||
search.head.age mustEqual 25 | ||
search.head.name mustEqual "Terra Salinas" | ||
} | ||
|
||
"between filter for number value" in { | ||
val luceneQuery = LuceneQueryConverter.parse("[1010 TO 1052.3]", "balance") | ||
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList() | ||
search must haveSize(3) | ||
search.head.age mustEqual 28 | ||
search.head.name mustEqual "Mason Donaldson" | ||
search.last.name mustEqual "Nash Dunn" | ||
} | ||
|
||
"between filter for number value not" in { | ||
val luceneQuery = LuceneQueryConverter.parse("-[1010 TO 1052.3]", "balance") | ||
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList() | ||
search must haveSize(197) | ||
search.head.age mustEqual 29 | ||
search.head.balance mustEqual 3996.0 | ||
search.head.name mustEqual "Diaz Jacobs" | ||
} | ||
|
||
"between filter for date value" in { | ||
val luceneQuery = LuceneQueryConverter.parse("[2014-04-20T00:00:00Z TO 2014-04-22T23:59:59Z]", "registered") | ||
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList() | ||
search must haveSize(10) | ||
search.head.age mustEqual 25 | ||
search.head.name mustEqual "Allison Turner" | ||
search.head.balance mustEqual 3961.0 | ||
} | ||
|
||
"equals Query with Date" in { | ||
val luceneQuery = LuceneQueryConverter.parse("registered:20140420T004427000+0200", "unbekannt") | ||
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList() | ||
search must haveSize(1) | ||
search.head.age mustEqual 31 | ||
search.head.name mustEqual "Latasha Mcmillan" | ||
search.head.balance mustEqual 3403.0 | ||
} | ||
|
||
"wildcard at the end" in { | ||
val luceneQuery = LuceneQueryConverter.parse("Latasha*", "name") | ||
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList() | ||
search must haveSize(1) | ||
search.head.age mustEqual 31 | ||
search.head.name mustEqual "Latasha Mcmillan" | ||
search.head.balance mustEqual 3403.0 | ||
} | ||
|
||
"wildcard at the start" in { | ||
val luceneQuery = LuceneQueryConverter.parse("*Mcmillan", "name") | ||
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList() | ||
search must haveSize(1) | ||
search.head.age mustEqual 31 | ||
search.head.name mustEqual "Latasha Mcmillan" | ||
search.head.balance mustEqual 3403.0 | ||
} | ||
|
||
"not wildcard at the start" in { | ||
val luceneQuery = LuceneQueryConverter.parse("-name:*Mcmillan", "ube") | ||
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList() | ||
search must haveSize(199) | ||
} | ||
|
||
"wildcard at the start and end" in { | ||
val luceneQuery = LuceneQueryConverter.parse("*Mcmil*", "name") | ||
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList() | ||
search must haveSize(1) | ||
search.head.age mustEqual 31 | ||
search.head.name mustEqual "Latasha Mcmillan" | ||
search.head.balance mustEqual 3403.0 | ||
} | ||
|
||
"not wildcard at the start and end" in { | ||
val luceneQuery = LuceneQueryConverter.parse("-name:*Mcmil*", "ube") | ||
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList() | ||
search must haveSize(199) | ||
} | ||
|
||
"wildcard in the middle" in { | ||
val luceneQuery = LuceneQueryConverter.parse("\"Latasha *millan\"", "name") | ||
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList() | ||
search must haveSize(1) | ||
search.head.age mustEqual 31 | ||
search.head.name mustEqual "Latasha Mcmillan" | ||
search.head.balance mustEqual 3403.0 | ||
} | ||
|
||
"not wildcard in the middle" in { | ||
val luceneQuery = LuceneQueryConverter.parse("-name:\"Latasha*millan\"", "ube") | ||
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList() | ||
search must haveSize(199) | ||
} | ||
} | ||
|
||
} |