Skip to content

Commit

Permalink
feat: Lucene Query can converted to Bson
Browse files Browse the repository at this point in the history
  • Loading branch information
QuadStingray committed Mar 29, 2023
1 parent 753a633 commit e56e5d4
Show file tree
Hide file tree
Showing 5 changed files with 341 additions and 0 deletions.
2 changes: 2 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ libraryDependencies += "org.xerial.snappy" % "snappy-java" % "1.1.9.1" % Provide

libraryDependencies += "com.github.luben" % "zstd-jni" % "1.5.4-2" % Provided

libraryDependencies += "org.apache.lucene" % "lucene-queryparser" % "9.5.0" % Provided

val MongoJavaServerVersion = "1.43.0"

libraryDependencies += "de.bwaldvogel" % "mongo-java-server" % MongoJavaServerVersion % Provided
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package dev.mongocamp.driver.mongodb.exception

class NotSupportedException(message: String) extends Exception(message)

Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package dev.mongocamp.driver.mongodb.lucene

import com.typesafe.scalalogging.LazyLogging
import dev.mongocamp.driver.mongodb._
import dev.mongocamp.driver.mongodb.exception.NotSupportedException
import org.apache.lucene.queryparser.classic.QueryParser
import org.apache.lucene.search.BooleanClause.Occur
import org.apache.lucene.search._
import org.mongodb.scala.bson.conversions.Bson

import java.text.SimpleDateFormat
import java.util.Date
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.jdk.CollectionConverters._

object LuceneQueryConverter extends LazyLogging {

def getMongoDbSearch(query: Query): Bson = {
getMongoDbSearchMap(query, false)
}

def parse(queryString: String, defaultField: String): Query = {
var analyzer = new MongoCampLuceneAnalyzer()
val queryParser = new QueryParser(defaultField, analyzer)
queryParser.setAllowLeadingWildcard(true)
val query = queryParser.parse(queryString)
analyzer.close()
analyzer = null
query
}

private def getMongoDbSearchMap(query: Query, negated: Boolean): Map[String, Any] = {
val searchMapResponse = mutable.Map[String, Any]()
query match {
case booleanQuery: BooleanQuery =>
appendBooleanQueryToSearchMap(searchMapResponse, booleanQuery)
case termRangeQuery: TermRangeQuery =>
appendTermRangeQueryToSearchMap(negated, searchMapResponse, termRangeQuery)
case termQuery: TermQuery =>
appendTermQueryToSearchMap(negated, searchMapResponse, termQuery)
case query: PrefixQuery =>
appendPrefixQueryToSearchMap(negated, searchMapResponse, query)
case query: WildcardQuery =>
appendWildCardQueryToSearchMap(negated, searchMapResponse, query)
case query: PhraseQuery =>
appendPhraseQueryToSearchMap(negated, searchMapResponse, query)
case a: Any =>
logger.error(s"Unexpected QueryType <${a.getClass.getSimpleName}>")
}
searchMapResponse.toMap

}
private def appendBooleanQueryToSearchMap(searchMapResponse: mutable.Map[String, Any], booleanQuery: BooleanQuery): Unit = {
val subQueries = booleanQuery.clauses().asScala
val listOfAnd = ArrayBuffer[Map[String, Any]]()
val listOfOr = ArrayBuffer[Map[String, Any]]()
var nextTypeAnd = true
subQueries
.foreach(c => {
val queryMap = getMongoDbSearchMap(c.getQuery, c.isProhibited)
var thisTypeAnd = true

if (c.getOccur == Occur.MUST) {
thisTypeAnd = true
}
else if (c.getOccur == Occur.SHOULD) {
thisTypeAnd = false
}
else if (c.getOccur == Occur.MUST_NOT) {
// searchMapResponse ++= queryMap
}
else {
logger.error(s"Unexpected Occur <${c.getOccur.name()}>")
throw new NotSupportedException(s"${c.getOccur.name()} currently not supported")
}

if (nextTypeAnd && thisTypeAnd) {
listOfAnd += queryMap
}
else {
listOfOr += queryMap
}
nextTypeAnd = thisTypeAnd
})

if (listOfAnd.nonEmpty) {
searchMapResponse.put("$and", listOfAnd.toList)
}
if (listOfOr.nonEmpty) {
searchMapResponse.put("$or", listOfOr.toList)
}
}
private def appendTermRangeQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], termRangeQuery: TermRangeQuery): Unit = {
val lowerBound = checkAndConvertValue(new String(termRangeQuery.getLowerTerm.bytes))
val upperBound = checkAndConvertValue(new String(termRangeQuery.getUpperTerm.bytes))
val inRangeSearch = Map("$lte" -> upperBound, "$gte" -> lowerBound)
if (negated) {
searchMapResponse.put(termRangeQuery.getField, Map("$not" -> inRangeSearch))
}
else {
searchMapResponse.put(termRangeQuery.getField, inRangeSearch)
}
}
private def appendTermQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], termQuery: TermQuery): Unit = {
if (negated) {
searchMapResponse.put(termQuery.getTerm.field(), Map("$ne" -> checkAndConvertValue(termQuery.getTerm.text())))
}
else {
searchMapResponse.put(termQuery.getTerm.field(), Map("$eq" -> checkAndConvertValue(termQuery.getTerm.text())))
}
}
private def appendPrefixQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], query: PrefixQuery): Unit = {
val searchValue = s"${checkAndConvertValue(query.getPrefix.text())}(.*?)"
val listOfSearches: List[Bson] = List(Map(query.getField -> generateRegexQuery(s"$searchValue", "i")))
if (negated) {
searchMapResponse.put("$nor", listOfSearches)
}
else {
searchMapResponse ++= Map("$and" -> listOfSearches)
}
}
private def appendWildCardQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], query: WildcardQuery): Unit = {
val searchValue = checkAndConvertValue(query.getTerm.text().replace("*", "(.*?)"))
if (negated) {
searchMapResponse.put(query.getField, Map("$not" -> generateRegexQuery(s"$searchValue", "i")))
}
else {
searchMapResponse.put(query.getField, generateRegexQuery(s"$searchValue", "i"))
}
}
private def appendPhraseQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], query: PhraseQuery): Unit = {
val listOfSearches = query.getTerms
.map(term => Map(term.field() -> generateRegexQuery(s"(.*?)${checkAndConvertValue(term.text())}(.*?)", "i")))
.toList
if (negated) {
searchMapResponse.put("$nor", listOfSearches)
}
else {
searchMapResponse ++= Map("$and" -> listOfSearches)
}
}
private def generateRegexQuery(pattern: String, options: String): Map[String, String] = {
Map("$regex" -> pattern, "$options" -> options)
}
private def checkAndConvertValue(s: String): Serializable = {
try {
if (s.toDoubleOption.getOrElse("").toString.equals(s)) {
s.toDouble
}
else if (s.toLongOption.getOrElse("").toString.equals(s)) {
s.toLong
}
else if (s.toBooleanOption.getOrElse("").toString.equals(s)) {
s.toBoolean
}
else {
val parsedOptions: Option[Date] = datePatters
.map(pattern => {
try {
val formatter = new SimpleDateFormat(pattern)
Option(formatter.parse(s))
}
catch {
case _: Exception =>
None
}
})
.find(_.nonEmpty)
.flatten
parsedOptions.getOrElse(s)
}
}
catch {
case _: Exception =>
s
}
}

private lazy val datePatters = List(
"yyyyMMdd'T'HHmmssSSSZZ",
"yyyyMMdd'T'HHmmssZZ",
"yyyyMMdd'T'HHmmZZ",
"yyyyMMdd'T'HHmmssSSS",
"yyyyMMdd'T'HHmmss",
"yyyyMMdd'T'HHmm",
"yyyy-MM-dd'T'HH:mm:ss.SSSZZ",
"yyyy-MM-dd'T'HH:mm:ssZZ",
"yyyy-MM-dd'T'HH:mmZZ",
"yyyy-MM-dd'T'HH:mm:ss.SSS",
"yyyy-MM-dd'T'HH:mm:ss",
"yyyy-MM-dd'T'HH:mm"
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package dev.mongocamp.driver.mongodb.lucene
import org.apache.lucene.analysis._
import org.apache.lucene.analysis.standard.StandardTokenizer

import java.io.Reader

class MongoCampLuceneAnalyzer(stopWords: CharArraySet = CharArraySet.EMPTY_SET, maxTokenLength: Int = MongoCampLuceneAnalyzer.defaultMaxTokenLength)
extends StopwordAnalyzerBase {

override protected def createComponents(fieldName: String): Analyzer.TokenStreamComponents = {
val src = new StandardTokenizer
src.setMaxTokenLength(maxTokenLength)
val tok: TokenStream = new StopFilter(src, stopwords)
new Analyzer.TokenStreamComponents(
(r: Reader) => {
src.setMaxTokenLength(maxTokenLength)
src.setReader(r)

},
tok
)
}

override protected def normalize(fieldName: String, in: TokenStream): TokenStream = in

}

object MongoCampLuceneAnalyzer {
private val defaultMaxTokenLength: Int = 255
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package dev.mongocamp.driver.mongodb.lucene

import dev.mongocamp.driver.mongodb._
import dev.mongocamp.driver.mongodb.dao.PersonSpecification
import dev.mongocamp.driver.mongodb.test.TestDatabase._

class LuceneSearchSpec extends PersonSpecification {
lazy val sortByBalance = Map("balance" -> -1)

"LuceneSearch" should {

"search with extended query" in {
val luceneQuery = LuceneQueryConverter.parse("(favoriteFruit:\"apple\" AND age:\"25\") OR name:*Cecile* AND -active:false AND 123", "id")
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList()
search must haveSize(1)
search.head.age mustEqual 25
search.head.name mustEqual "Terra Salinas"
}

"between filter for number value" in {
val luceneQuery = LuceneQueryConverter.parse("[1010 TO 1052.3]", "balance")
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList()
search must haveSize(3)
search.head.age mustEqual 28
search.head.name mustEqual "Mason Donaldson"
search.last.name mustEqual "Nash Dunn"
}

"between filter for number value not" in {
val luceneQuery = LuceneQueryConverter.parse("-[1010 TO 1052.3]", "balance")
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList()
search must haveSize(197)
search.head.age mustEqual 29
search.head.balance mustEqual 3996.0
search.head.name mustEqual "Diaz Jacobs"
}

"between filter for date value" in {
val luceneQuery = LuceneQueryConverter.parse("[2014-04-20T00:00:00Z TO 2014-04-22T23:59:59Z]", "registered")
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList()
search must haveSize(10)
search.head.age mustEqual 25
search.head.name mustEqual "Allison Turner"
search.head.balance mustEqual 3961.0
}

"equals Query with Date" in {
val luceneQuery = LuceneQueryConverter.parse("registered:20140420T004427000+0200", "unbekannt")
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList()
search must haveSize(1)
search.head.age mustEqual 31
search.head.name mustEqual "Latasha Mcmillan"
search.head.balance mustEqual 3403.0
}

"wildcard at the end" in {
val luceneQuery = LuceneQueryConverter.parse("Latasha*", "name")
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList()
search must haveSize(1)
search.head.age mustEqual 31
search.head.name mustEqual "Latasha Mcmillan"
search.head.balance mustEqual 3403.0
}

"wildcard at the start" in {
val luceneQuery = LuceneQueryConverter.parse("*Mcmillan", "name")
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList()
search must haveSize(1)
search.head.age mustEqual 31
search.head.name mustEqual "Latasha Mcmillan"
search.head.balance mustEqual 3403.0
}

"not wildcard at the start" in {
val luceneQuery = LuceneQueryConverter.parse("-name:*Mcmillan", "ube")
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList()
search must haveSize(199)
}

"wildcard at the start and end" in {
val luceneQuery = LuceneQueryConverter.parse("*Mcmil*", "name")
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList()
search must haveSize(1)
search.head.age mustEqual 31
search.head.name mustEqual "Latasha Mcmillan"
search.head.balance mustEqual 3403.0
}

"not wildcard at the start and end" in {
val luceneQuery = LuceneQueryConverter.parse("-name:*Mcmil*", "ube")
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList()
search must haveSize(199)
}

"wildcard in the middle" in {
val luceneQuery = LuceneQueryConverter.parse("\"Latasha *millan\"", "name")
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList()
search must haveSize(1)
search.head.age mustEqual 31
search.head.name mustEqual "Latasha Mcmillan"
search.head.balance mustEqual 3403.0
}

"not wildcard in the middle" in {
val luceneQuery = LuceneQueryConverter.parse("-name:\"Latasha*millan\"", "ube")
val search = PersonDAO.find(LuceneQueryConverter.getMongoDbSearch(luceneQuery), sortByBalance).resultList()
search must haveSize(199)
}
}

}

0 comments on commit e56e5d4

Please sign in to comment.