From 6d1c232acdd224d0e84184f3b0fb6812a9d9a4eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Wed, 19 Nov 2025 08:54:09 +0100 Subject: [PATCH 01/40] fix terms agg --- README.md | 10 +++--- .../sql/bridge/ElasticAggregation.scala | 4 +-- .../elastic/sql/SQLQuerySpec.scala | 24 +++++++------- build.sbt | 2 +- .../sql/bridge/ElasticAggregation.scala | 4 +-- .../elastic/sql/SQLQuerySpec.scala | 31 ++++++++++++------- 6 files changed, 41 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index db696f36..bccc96fc 100644 --- a/README.md +++ b/README.md @@ -787,18 +787,18 @@ ThisBuild / resolvers ++= Seq( // For Elasticsearch 6 // Using Jest client -libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es6-jest-client" % 0.13.0 +libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es6-jest-client" % 0.13.1 // Or using Rest High Level client -libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es6-rest-client" % 0.13.0 +libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es6-rest-client" % 0.13.1 // For Elasticsearch 7 -libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es7-rest-client" % 0.13.0 +libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es7-rest-client" % 0.13.1 // For Elasticsearch 8 -libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es8-java-client" % 0.13.0 +libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es8-java-client" % 0.13.1 // For Elasticsearch 9 -libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es9-java-client" % 0.13.0 +libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es9-java-client" % 0.13.1 ``` ### **Quick Example** diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 51b3766b..fa7aad1c 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -271,13 +271,13 @@ object ElasticAggregation { var agg = { bucketsDirection.get(bucket.identifier.identifierName) match { case Some(direction) => - termsAgg(bucket.name, s"$currentBucketPath.keyword") + termsAgg(bucket.name, currentBucketPath) .order(Seq(direction match { case Asc => TermsOrder("_key", asc = true) case _ => TermsOrder("_key", asc = false) })) case None => - termsAgg(bucket.name, s"$currentBucketPath.keyword") + termsAgg(bucket.name, currentBucketPath) } } bucket.size.foreach(s => agg = agg.size(s)) diff --git a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 8a41ae9d..1a7651b2 100644 --- a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -530,7 +530,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "Country": { | "terms": { - | "field": "Country.keyword", + | "field": "Country", | "exclude": ["USA"], | "order": { | "_key": "asc" @@ -539,7 +539,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "City": { | "terms": { - | "field": "City.keyword", + | "field": "City", | "exclude": ["Berlin"], | "order": { | "cnt": "desc" @@ -793,7 +793,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "cat": { | "terms": { - | "field": "products.category.keyword", + | "field": "products.category", | "size": 10 | }, | "aggs": { @@ -1009,7 +1009,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "userId": { | "terms": { - | "field": "userId.keyword" + | "field": "userId" | }, | "aggs": { | "lastSeen": { @@ -1053,7 +1053,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "Country": { | "terms": { - | "field": "Country.keyword", + | "field": "Country", | "exclude": ["USA"], | "order": { | "_key": "asc" @@ -1062,7 +1062,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "City": { | "terms": { - | "field": "City.keyword", + | "field": "City", | "exclude": ["Berlin"] | }, | "aggs": { @@ -1118,7 +1118,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "Country": { | "terms": { - | "field": "Country.keyword", + | "field": "Country", | "exclude": [ | "USA" | ], @@ -1129,7 +1129,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "City": { | "terms": { - | "field": "City.keyword", + | "field": "City", | "exclude": [ | "Berlin" | ] @@ -1193,7 +1193,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "identifier": { | "terms": { - | "field": "identifier.keyword", + | "field": "identifier", | "order": { | "ct": "desc" | } @@ -1360,7 +1360,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "identifier": { | "terms": { - | "field": "identifier.keyword", + | "field": "identifier", | "order": { | "ct": "desc" | } @@ -1517,7 +1517,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "identifier": { | "terms": { - | "field": "identifier.keyword" + | "field": "identifier" | }, | "aggs": { | "max_diff": { @@ -2758,7 +2758,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "dept": { | "terms": { - | "field": "department.keyword" + | "field": "department" | }, | "aggs": { | "cnt": { diff --git a/build.sbt b/build.sbt index 38774a28..cdf10c17 100644 --- a/build.sbt +++ b/build.sbt @@ -19,7 +19,7 @@ ThisBuild / organization := "app.softnetwork" name := "softclient4es" -ThisBuild / version := "0.13.0" +ThisBuild / version := "0.13.1" ThisBuild / scalaVersion := scala213 diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index be7d57ed..f1499574 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -268,13 +268,13 @@ object ElasticAggregation { var agg = { bucketsDirection.get(bucket.identifier.identifierName) match { case Some(direction) => - termsAgg(bucket.name, s"$currentBucketPath.keyword") + termsAgg(bucket.name, currentBucketPath) .order(Seq(direction match { case Asc => TermsOrder("_key", asc = true) case _ => TermsOrder("_key", asc = false) })) case None => - termsAgg(bucket.name, s"$currentBucketPath.keyword") + termsAgg(bucket.name, currentBucketPath) } } bucket.size.foreach(s => agg = agg.size(s)) diff --git a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index d28b409f..29ffaa51 100644 --- a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -530,7 +530,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "Country": { | "terms": { - | "field": "Country.keyword", + | "field": "Country", | "exclude": "USA", | "order": { | "_key": "asc" @@ -539,7 +539,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "City": { | "terms": { - | "field": "City.keyword", + | "field": "City", | "exclude": "Berlin", | "order": { | "cnt": "desc" @@ -793,7 +793,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "cat": { | "terms": { - | "field": "products.category.keyword", + | "field": "products.category", | "size": 10 | }, | "aggs": { @@ -1009,7 +1009,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "userId": { | "terms": { - | "field": "userId.keyword" + | "field": "userId" | }, | "aggs": { | "lastSeen": { @@ -1053,7 +1053,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "Country": { | "terms": { - | "field": "Country.keyword", + | "field": "Country", | "exclude": "USA", | "order": { | "_key": "asc" @@ -1062,7 +1062,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "City": { | "terms": { - | "field": "City.keyword", + | "field": "City", | "exclude": "Berlin" | }, | "aggs": { @@ -1118,7 +1118,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "Country": { | "terms": { - | "field": "Country.keyword", + | "field": "Country", | "exclude": "USA", | "order": { | "_key": "asc" @@ -1127,7 +1127,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "City": { | "terms": { - | "field": "City.keyword", + | "field": "City", | "exclude": "Berlin" | }, | "aggs": { @@ -1189,7 +1189,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "identifier": { | "terms": { - | "field": "identifier.keyword", + | "field": "identifier", | "order": { | "ct": "desc" | } @@ -1356,7 +1356,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "identifier": { | "terms": { - | "field": "identifier.keyword", + | "field": "identifier", | "order": { | "ct": "desc" | } @@ -1513,7 +1513,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "identifier": { | "terms": { - | "field": "identifier.keyword" + | "field": "identifier" | }, | "aggs": { | "max_diff": { @@ -2754,7 +2754,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "aggs": { | "dept": { | "terms": { - | "field": "department.keyword" + | "field": "department" | }, | "aggs": { | "cnt": { @@ -3729,4 +3729,11 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { |}""".stripMargin.replaceAll("\\s+", "") } + it should "test" in { + val query = + """SELECT name FROM users WHERE status = 'active' AND age > 25 + |""".stripMargin + val select: ElasticSearchRequest = SQLQuery(query) + println(select.query) + } } From 2181ac26380b0941d888cd0070e2bb7d3643e4f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Wed, 19 Nov 2025 09:06:04 +0100 Subject: [PATCH 02/40] remove .keyword --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index bccc96fc..cfbc96a2 100644 --- a/README.md +++ b/README.md @@ -264,13 +264,13 @@ val results = client.search(SQLQuery(sqlQuery)) "aggs": { "restaurant_name": { "terms": { - "field": "restaurant_name.keyword", + "field": "restaurant_name", "size": 1000 }, "aggs": { "restaurant_city": { "terms": { - "field": "restaurant_city.keyword", + "field": "restaurant_city", "size": 1000 }, "aggs": { @@ -296,7 +296,7 @@ val results = client.search(SQLQuery(sqlQuery)) "aggs": { "menu_category": { "terms": { - "field": "menus.category.keyword", + "field": "menus.category", "size": 1000 }, "aggs": { @@ -307,7 +307,7 @@ val results = client.search(SQLQuery(sqlQuery)) "aggs": { "dish_name": { "terms": { - "field": "menus.dishes.name.keyword", + "field": "menus.dishes.name", "size": 1000 }, "aggs": { @@ -339,7 +339,7 @@ val results = client.search(SQLQuery(sqlQuery)) }, "ingredient_name": { "terms": { - "field": "menus.dishes.ingredients.name.keyword", + "field": "menus.dishes.ingredients.name", "size": 1000 }, "aggs": { From 9db439f05c3abd0a17a0a0eeaa4d440848bbf395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Fri, 21 Nov 2025 11:55:52 +0100 Subject: [PATCH 03/40] fix fields duplicates --- .../softnetwork/elastic/sql/bridge/ElasticAggregation.scala | 4 ++++ .../softnetwork/elastic/sql/bridge/ElasticAggregation.scala | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index fa7aad1c..5b144ee9 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -167,13 +167,17 @@ object ElasticAggregation { .fetchSource( th.identifier.name +: th.fields .filterNot(_.isScriptField) + .filterNot(_.sourceField == th.identifier.name) .map(_.sourceField) + .distinct .toArray, Array.empty ) .copy( scripts = th.fields .filter(_.isScriptField) + .groupBy(_.sourceField) + .map(_._2.head) .map(f => f.sourceField -> Script(f.painless(None)).lang("painless")) .toMap ) diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index f1499574..004e1558 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -167,13 +167,17 @@ object ElasticAggregation { .fetchSource( th.identifier.name +: th.fields .filterNot(_.isScriptField) + .filterNot(_.sourceField == th.identifier.name) .map(_.sourceField) + .distinct .toArray, Array.empty ) .copy( scripts = th.fields .filter(_.isScriptField) + .groupBy(_.sourceField) + .map(_._2.head) .map(f => f.sourceField -> Script(f.painless(None)).lang("painless")) .toMap ) From c39d9b9491dd8ace74132e72ba74c052dd2f067a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Fri, 21 Nov 2025 11:56:59 +0100 Subject: [PATCH 04/40] minor refactoring for window functions --- .../sql/bridge/ElasticAggregation.scala | 10 ++-- .../sql/bridge/ElasticAggregation.scala | 10 ++-- .../sql/function/aggregate/package.scala | 50 +++++++++---------- .../elastic/sql/parser/SelectParser.scala | 2 +- .../parser/function/aggregate/package.scala | 8 +-- .../elastic/sql/query/SQLSearchRequest.scala | 13 ++--- .../elastic/sql/query/Select.scala | 20 ++++---- 7 files changed, 57 insertions(+), 56 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 5b144ee9..92d81665 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -113,8 +113,8 @@ object ElasticAggregation { s"${aggType}_distinct_${sourceField.replace(".", "_")}" else { aggType match { - case th: TopHitsAggregation => - s"${th.topHits.sql.toLowerCase}_${sourceField.replace(".", "_")}" + case th: WindowFunction => + s"${th.window.sql.toLowerCase}_${sourceField.replace(".", "_")}" case _ => s"${aggType}_${sourceField.replace(".", "_")}" @@ -154,7 +154,7 @@ object ElasticAggregation { case MAX => aggWithFieldOrScript(maxAgg, (name, s) => maxAgg(name, sourceField).script(s)) case AVG => aggWithFieldOrScript(avgAgg, (name, s) => avgAgg(name, sourceField).script(s)) case SUM => aggWithFieldOrScript(sumAgg, (name, s) => sumAgg(name, sourceField).script(s)) - case th: TopHitsAggregation => + case th: WindowFunction => val limit = { th match { case _: LastValue => 1 @@ -184,12 +184,12 @@ object ElasticAggregation { .size(limit) sortBy th.orderBy.sorts.map(sort => sort.order match { case Some(Desc) => - th.topHits match { + th.window match { case LAST_VALUE => FieldSort(sort.field).asc() case _ => FieldSort(sort.field).desc() } case _ => - th.topHits match { + th.window match { case LAST_VALUE => FieldSort(sort.field).desc() case _ => FieldSort(sort.field).asc() } diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 004e1558..7a046ada 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -113,8 +113,8 @@ object ElasticAggregation { s"${aggType}_distinct_${sourceField.replace(".", "_")}" else { aggType match { - case th: TopHitsAggregation => - s"${th.topHits.sql.toLowerCase}_${sourceField.replace(".", "_")}" + case th: WindowFunction => + s"${th.window.sql.toLowerCase}_${sourceField.replace(".", "_")}" case _ => s"${aggType}_${sourceField.replace(".", "_")}" @@ -154,7 +154,7 @@ object ElasticAggregation { case MAX => aggWithFieldOrScript(maxAgg, (name, s) => maxAgg(name, sourceField).script(s)) case AVG => aggWithFieldOrScript(avgAgg, (name, s) => avgAgg(name, sourceField).script(s)) case SUM => aggWithFieldOrScript(sumAgg, (name, s) => sumAgg(name, sourceField).script(s)) - case th: TopHitsAggregation => + case th: WindowFunction => val limit = { th match { case _: LastValue => 1 @@ -184,12 +184,12 @@ object ElasticAggregation { .size(limit) sortBy th.orderBy.sorts.map(sort => sort.order match { case Some(Desc) => - th.topHits match { + th.window match { case LAST_VALUE => FieldSort(sort.field).asc() case _ => FieldSort(sort.field).desc() } case _ => - th.topHits match { + th.window match { case LAST_VALUE => FieldSort(sort.field).desc() case _ => FieldSort(sort.field).asc() } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala index 68611f9f..ad61c9d1 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala @@ -35,17 +35,17 @@ package object aggregate { case object SUM extends Expr("SUM") with AggregateFunction - sealed trait TopHits extends TokenRegex + sealed trait Window extends TokenRegex - case object FIRST_VALUE extends Expr("FIRST_VALUE") with TopHits { + case object FIRST_VALUE extends Expr("FIRST_VALUE") with Window { override val words: List[String] = List(sql, "FIRST") } - case object LAST_VALUE extends Expr("LAST_VALUE") with TopHits { + case object LAST_VALUE extends Expr("LAST_VALUE") with Window { override val words: List[String] = List(sql, "LAST") } - case object ARRAY_AGG extends Expr("ARRAY_AGG") with TopHits { + case object ARRAY_AGG extends Expr("ARRAY_AGG") with Window { override val words: List[String] = List(sql, "ARRAY") } @@ -53,14 +53,14 @@ package object aggregate { case object PARTITION_BY extends Expr("PARTITION BY") with TokenRegex - sealed trait TopHitsAggregation + sealed trait WindowFunction extends AggregateFunction with FunctionWithIdentifier with Updateable { def partitionBy: Seq[Identifier] - def withPartitionBy(partitionBy: Seq[Identifier]): TopHitsAggregation + def withPartitionBy(partitionBy: Seq[Identifier]): WindowFunction def orderBy: OrderBy - def topHits: TopHits + def window: Window def limit: Option[Limit] lazy val buckets: Seq[Bucket] = partitionBy.map(identifier => Bucket(identifier, None)) @@ -73,16 +73,16 @@ package object aggregate { val partitionByStr = if (partitionBy.nonEmpty) s"$PARTITION_BY ${partitionBy.mkString(", ")}" else "" - s"$topHits($identifier) $OVER ($partitionByStr$orderBy)" + s"$window($identifier) $OVER ($partitionByStr$orderBy)" } override def toSQL(base: String): String = sql def fields: Seq[Field] - def withFields(fields: Seq[Field]): TopHitsAggregation + def withFields(fields: Seq[Field]): WindowFunction - def update(request: SQLSearchRequest): TopHitsAggregation = { + def update(request: SQLSearchRequest): WindowFunction = { val updated = this .withPartitionBy(partitionBy = partitionBy.map(_.update(request))) updated.withFields( @@ -101,13 +101,13 @@ package object aggregate { partitionBy: Seq[Identifier] = Seq.empty, orderBy: OrderBy, fields: Seq[Field] = Seq.empty - ) extends TopHitsAggregation { + ) extends WindowFunction { override def limit: Option[Limit] = Some(Limit(1, None)) - override def topHits: TopHits = FIRST_VALUE - override def withPartitionBy(partitionBy: Seq[Identifier]): TopHitsAggregation = + override def window: Window = FIRST_VALUE + override def withPartitionBy(partitionBy: Seq[Identifier]): WindowFunction = this.copy(partitionBy = partitionBy) - override def withFields(fields: Seq[Field]): TopHitsAggregation = this.copy(fields = fields) - override def update(request: SQLSearchRequest): TopHitsAggregation = super + override def withFields(fields: Seq[Field]): WindowFunction = this.copy(fields = fields) + override def update(request: SQLSearchRequest): WindowFunction = super .update(request) .asInstanceOf[FirstValue] .copy( @@ -121,13 +121,13 @@ package object aggregate { partitionBy: Seq[Identifier] = Seq.empty, orderBy: OrderBy, fields: Seq[Field] = Seq.empty - ) extends TopHitsAggregation { + ) extends WindowFunction { override def limit: Option[Limit] = Some(Limit(1, None)) - override def topHits: TopHits = LAST_VALUE - override def withPartitionBy(partitionBy: Seq[Identifier]): TopHitsAggregation = + override def window: Window = LAST_VALUE + override def withPartitionBy(partitionBy: Seq[Identifier]): WindowFunction = this.copy(partitionBy = partitionBy) - override def withFields(fields: Seq[Field]): TopHitsAggregation = this.copy(fields = fields) - override def update(request: SQLSearchRequest): TopHitsAggregation = super + override def withFields(fields: Seq[Field]): WindowFunction = this.copy(fields = fields) + override def update(request: SQLSearchRequest): WindowFunction = super .update(request) .asInstanceOf[LastValue] .copy( @@ -142,12 +142,12 @@ package object aggregate { orderBy: OrderBy, fields: Seq[Field] = Seq.empty, limit: Option[Limit] = None - ) extends TopHitsAggregation { - override def topHits: TopHits = ARRAY_AGG - override def withPartitionBy(partitionBy: Seq[Identifier]): TopHitsAggregation = + ) extends WindowFunction { + override def window: Window = ARRAY_AGG + override def withPartitionBy(partitionBy: Seq[Identifier]): WindowFunction = this.copy(partitionBy = partitionBy) - override def withFields(fields: Seq[Field]): TopHitsAggregation = this - override def update(request: SQLSearchRequest): TopHitsAggregation = super + override def withFields(fields: Seq[Field]): WindowFunction = this + override def update(request: SQLSearchRequest): WindowFunction = super .update(request) .asInstanceOf[ArrayAgg] .copy( diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/SelectParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/SelectParser.scala index 752fcaa8..92ead92d 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/SelectParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/SelectParser.scala @@ -22,7 +22,7 @@ trait SelectParser { self: Parser with WhereParser => def field: PackratParser[Field] = - (identifierWithTopHits | + (identifierWithWindowFunction | identifierWithArithmeticExpression | identifierWithTransformation | identifierWithAggregation | diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala index 8a962efd..e815d1ad 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala @@ -59,22 +59,22 @@ package object aggregate { } } - def first_value: PackratParser[TopHitsAggregation] = + def first_value: PackratParser[WindowFunction] = FIRST_VALUE.regex ~ top_hits ^^ { case _ ~ top => FirstValue(top._1, top._2, top._3) } - def last_value: PackratParser[TopHitsAggregation] = + def last_value: PackratParser[WindowFunction] = LAST_VALUE.regex ~ top_hits ^^ { case _ ~ top => LastValue(top._1, top._2, top._3) } - def array_agg: PackratParser[TopHitsAggregation] = + def array_agg: PackratParser[WindowFunction] = ARRAY_AGG.regex ~ top_hits ^^ { case _ ~ top => ArrayAgg(top._1, top._2, top._3, limit = None) } - def identifierWithTopHits: PackratParser[Identifier] = + def identifierWithWindowFunction: PackratParser[Identifier] = (first_value | last_value | array_agg) ^^ { th => th.identifier.withFunctions(th +: th.identifier.functions) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala index 15c174b4..2a74dbf4 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala @@ -16,7 +16,7 @@ package app.softnetwork.elastic.sql.query -import app.softnetwork.elastic.sql.function.aggregate.TopHitsAggregation +import app.softnetwork.elastic.sql.function.aggregate.WindowFunction import app.softnetwork.elastic.sql.{asString, Token} case class SQLSearchRequest( @@ -123,16 +123,17 @@ case class SQLSearchRequest( .filterNot(_.nested) .map(_.sourceField) .filterNot(f => excludes.contains(f)) + .distinct else Seq.empty } - lazy val topHitsFields: Seq[Field] = select.fields.filter(_.topHits.nonEmpty) + lazy val windowFields: Seq[Field] = select.fields.filter(_.windows.nonEmpty) - lazy val topHitsAggs: Seq[TopHitsAggregation] = topHitsFields.flatMap(_.topHits) + lazy val windowFunctions: Seq[WindowFunction] = windowFields.flatMap(_.windows) lazy val aggregates: Seq[Field] = - select.fields.filter(_.aggregation).filterNot(_.topHits.isDefined) ++ topHitsFields + select.fields.filter(_.aggregation).filterNot(_.windows.isDefined) ++ windowFields lazy val sqlAggregations: Map[String, SQLAggregation] = aggregates.flatMap(f => SQLAggregation.fromField(f, this)).map(a => a.aggName -> a).toMap @@ -141,7 +142,7 @@ case class SQLSearchRequest( lazy val sources: Seq[String] = from.tables.map(_.name) - lazy val topHitsBuckets: Seq[Bucket] = topHitsAggs + lazy val windowBuckets: Seq[Bucket] = windowFunctions .flatMap(_.bucketNames) .filterNot(bucket => groupBy.map(_.bucketNames).getOrElse(Map.empty).keys.toSeq.contains(bucket._1) @@ -150,7 +151,7 @@ case class SQLSearchRequest( .values .toSeq - lazy val buckets: Seq[Bucket] = groupBy.map(_.buckets).getOrElse(Seq.empty) ++ topHitsBuckets + lazy val buckets: Seq[Bucket] = groupBy.map(_.buckets).getOrElse(Seq.empty) ++ windowBuckets override def validate(): Either[String, Unit] = { for { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala index 6499405a..806ce4d7 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala @@ -16,7 +16,7 @@ package app.softnetwork.elastic.sql.query -import app.softnetwork.elastic.sql.function.aggregate.{AggregateFunction, TopHitsAggregation} +import app.softnetwork.elastic.sql.function.aggregate.{AggregateFunction, WindowFunction} import app.softnetwork.elastic.sql.function.{Function, FunctionChain, FunctionUtils} import app.softnetwork.elastic.sql.{ asString, @@ -64,19 +64,19 @@ case class Field( override def functions: List[Function] = identifier.functions - lazy val topHits: Option[TopHitsAggregation] = - functions.collectFirst { case th: TopHitsAggregation => th } + lazy val windows: Option[WindowFunction] = + functions.collectFirst { case th: WindowFunction => th } def update(request: SQLSearchRequest): Field = { - topHits match { + windows match { case Some(th) => - val topHitsAggregation = th.update(request) - val identifier = topHitsAggregation.identifier + val windowFunction = th.update(request) + val identifier = windowFunction.identifier identifier.functions match { case _ :: tail => - this.copy(identifier = identifier.withFunctions(functions = topHitsAggregation +: tail)) + this.copy(identifier = identifier.withFunctions(functions = windowFunction +: tail)) case _ => - this.copy(identifier = identifier.withFunctions(functions = List(topHitsAggregation))) + this.copy(identifier = identifier.withFunctions(functions = List(windowFunction))) } case None => this.copy(identifier = identifier.update(request)) } @@ -162,8 +162,8 @@ object SQLAggregation { s"${aggType}_distinct_${sourceField.replace(".", "_")}" else { aggType match { - case th: TopHitsAggregation => - s"${th.topHits.sql.toLowerCase}_${sourceField.replace(".", "_")}" + case th: WindowFunction => + s"${th.window.sql.toLowerCase}_${sourceField.replace(".", "_")}" case _ => s"${aggType}_${sourceField.replace(".", "_")}" From 4b421ff3f752a83796a090fde050ae94774b9af3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Sat, 22 Nov 2025 10:16:24 +0100 Subject: [PATCH 05/40] fix macros for COUNT(*), update ElasticResponse adding optional SQL and updating results as rows --- .../elastic/client/AggregateApi.scala | 80 +++---- .../elastic/client/ElasticConversion.scala | 16 +- .../elastic/client/SearchApi.scala | 204 +++++++++++++----- .../softnetwork/elastic/client/package.scala | 28 ++- .../elastic/client/scroll/package.scala | 3 +- .../client/ElasticConversionSpec.scala | 48 ++--- .../elastic/client/jest/JestScrollApi.scala | 17 +- .../client/rest/RestHighLevelClientApi.scala | 18 +- .../client/rest/RestHighLevelClientApi.scala | 16 +- .../elastic/client/java/JavaClientApi.scala | 16 +- .../elastic/client/java/JavaClientApi.scala | 17 +- .../sql/macros/SQLQueryValidator.scala | 8 +- .../elastic/sql/query/SQLSearchRequest.scala | 2 + 13 files changed, 284 insertions(+), 189 deletions(-) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/AggregateApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/AggregateApi.scala index fb4dddb6..caa3325c 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/AggregateApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/AggregateApi.scala @@ -130,59 +130,41 @@ trait SingleValueAggregateApi // Execute the search search(sqlQuery) .flatMap { response => - // Parse the response - val parseResult = ElasticResult.fromTry(parseResponse(response)) - - parseResult match { - // Case 1: Parse successful - process the results - case ElasticSuccess(results) => - val aggregationResults = results.flatMap { result => - response.aggregations.map { case (name, aggregation) => - // Attempt to process each aggregation - val aggregationResult = ElasticResult.attempt { - val value = findAggregation(name, result).orNull match { - case b: Boolean => BooleanValue(b) - case n: Number => NumericValue(n) - case s: String => StringValue(s) - case t: Temporal => TemporalValue(t) - case m: Map[_, Any] => ObjectValue(m.map(kv => kv._1.toString -> kv._2)) - case s: Seq[_] if aggregation.multivalued => - getAggregateValue(s, aggregation.distinct) - case _ => EmptyValue - } - - SingleValueAggregateResult(name, aggregation.aggType, value) - } - - // Convert failures to results with errors - aggregationResult match { - case ElasticSuccess(result) => result - case ElasticFailure(error) => - SingleValueAggregateResult( - name, - aggregation.aggType, - EmptyValue, - error = Some(s"Failed to process aggregation: ${error.message}") - ) - } - }.toSeq + val results = response.results + val aggregationResults = results.flatMap { result => + response.aggregations.map { case (name, aggregation) => + // Attempt to process each aggregation + val aggregationResult = ElasticResult.attempt { + val value = findAggregation(name, result).orNull match { + case b: Boolean => BooleanValue(b) + case n: Number => NumericValue(n) + case s: String => StringValue(s) + case t: Temporal => TemporalValue(t) + case m: Map[_, Any] => ObjectValue(m.map(kv => kv._1.toString -> kv._2)) + case s: Seq[_] if aggregation.multivalued => + getAggregateValue(s, aggregation.distinct) + case _ => EmptyValue + } + + SingleValueAggregateResult(name, aggregation.aggType, value) } - ElasticResult.success(aggregationResults) + // Convert failures to results with errors + aggregationResult match { + case ElasticSuccess(result) => result + case ElasticFailure(error) => + SingleValueAggregateResult( + name, + aggregation.aggType, + EmptyValue, + error = Some(s"Failed to process aggregation: ${error.message}") + ) + } + }.toSeq + } - // Case 2: Parse failed - returning empty results with errors - case ElasticFailure(error) => - val errorResults = response.aggregations.map { case (name, aggregation) => - SingleValueAggregateResult( - name, - aggregation.aggType, - EmptyValue, - error = Some(s"Parse error: ${error.message}") - ) - }.toSeq + ElasticResult.success(aggregationResults) - ElasticResult.success(errorResults) - } } .fold( // If search() fails, throw an exception diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala index c69b085a..785882a9 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala @@ -44,11 +44,11 @@ trait ElasticConversion { m: Manifest[T], formats: Formats ): Try[Seq[T]] = { - parseResponse(response).map { rows => - rows.map { row => + Try( + response.results.map { row => convertTo[T](row)(m, formats) } - } + ) } // Formatters for elasticsearch ISO 8601 date/time strings @@ -60,15 +60,17 @@ trait ElasticConversion { * multi-search (msearch/UNION ALL) responses */ def parseResponse( - response: ElasticResponse + results: String, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] ): Try[Seq[Map[String, Any]]] = { - val json = mapper.readTree(response.results) + val json = mapper.readTree(results) // Check if it's a multi-search response (array of responses) if (json.isArray) { - parseMultiSearchResponse(json, response.fieldAliases, response.aggregations) + parseMultiSearchResponse(json, fieldAliases, aggregations) } else { // Single search response - parseSingleSearchResponse(json, response.fieldAliases, response.aggregations) + parseSingleSearchResponse(json, fieldAliases, aggregations) } } diff --git a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala index e2d460af..a75c75ec 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala @@ -65,7 +65,8 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { case Some(Left(single)) => val elasticQuery = ElasticQuery( single, - collection.immutable.Seq(single.sources: _*) + collection.immutable.Seq(single.sources: _*), + sql = Some(sql.query) ) singleSearch(elasticQuery, single.fieldAliases, single.sqlAggregations) @@ -76,17 +77,18 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { query, collection.immutable.Seq(query.sources: _*) ) - }.toList + }.toList, + sql = Some(sql.query) ) multiSearch(elasticQueries, multiple.fieldAliases, multiple.sqlAggregations) case None => logger.error( - s"❌ Failed to execute search for query '${sql.query}'" + s"❌ Failed to execute search for query \n${sql.query}" ) ElasticResult.failure( ElasticError( - message = s"SQL query does not contain a valid search request: ${sql.query}", + message = s"SQL query does not contain a valid search request\n${sql.query}", operation = Some("search") ) ) @@ -122,36 +124,60 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { case None => // continue } + val sql = elasticQuery.sql + val query = elasticQuery.query + val indices = elasticQuery.indices.mkString(",") + logger.debug( - s"Searching with query '${elasticQuery.query}' in indices '${elasticQuery.indices.mkString(",")}'" + s"Searching with query \n${sql.getOrElse(query)}\nin indices '$indices'" ) executeSingleSearch(elasticQuery) match { case ElasticSuccess(Some(response)) => logger.info( - s"✅ Successfully executed search in indices '${elasticQuery.indices.mkString(",")}'" - ) - ElasticResult.success( - ElasticResponse( - elasticQuery.query, - response, - fieldAliases, - aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) - ) + s"✅ Successfully executed search for query \n${sql.getOrElse(query)}\nin indices '$indices'" ) + val aggs = aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) + ElasticResult.fromTry(parseResponse(response, fieldAliases, aggs)) match { + case success @ ElasticSuccess(_) => + logger.info( + s"✅ Successfully parsed search results for query \n${sql.getOrElse(query)}\nin indices '$indices'" + ) + ElasticResult.success( + ElasticResponse( + sql, + query, + success.value, + fieldAliases, + aggs + ) + ) + case ElasticFailure(error) => + logger.error( + s"❌ Failed to parse search results for query \n${sql + .getOrElse(query)}\nin indices '$indices' -> ${error.message}" + ) + ElasticResult.failure( + error.copy( + operation = Some("search"), + index = Some(elasticQuery.indices.mkString(",")) + ) + ) + } case ElasticSuccess(_) => val error = ElasticError( message = - s"Failed to execute search in indices '${elasticQuery.indices.mkString(",")}'", - index = Some(elasticQuery.indices.mkString(",")), + s"Failed to execute search for query \n${sql.getOrElse(query)}\nin indices '$indices'", + index = Some(indices), operation = Some("search") ) logger.error(s"❌ ${error.message}") ElasticResult.failure(error) case ElasticFailure(error) => logger.error( - s"❌ Failed to execute search in indices '${elasticQuery.indices.mkString(",")}': ${error.message}" + s"❌ Failed to execute search for query \n${sql + .getOrElse(query)}\nin indices '$indices' -> ${error.message}" ) ElasticResult.failure( error.copy( @@ -196,34 +222,56 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { ) } + val query = elasticQueries.queries.map(_.query).mkString("\n") + val sql = elasticQueries.sql.orElse( + Option(elasticQueries.queries.flatMap(_.sql).mkString("\nUNION ALL\n")) + ) + logger.debug( - s"Multi-searching with ${elasticQueries.queries.size} queries" + s"Multi-searching with query \n${sql.getOrElse(query)}" ) executeMultiSearch(elasticQueries) match { case ElasticSuccess(Some(response)) => logger.info( - s"✅ Successfully executed multi-search with ${elasticQueries.queries.size} queries" - ) - ElasticResult.success( - ElasticResponse( - elasticQueries.queries.map(_.query).mkString("\n"), - response, - fieldAliases, - aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) - ) + s"✅ Successfully executed multi-search for query \n${sql.getOrElse(query)}" ) + val aggs = aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) + ElasticResult.fromTry(parseResponse(response, fieldAliases, aggs)) match { + case success @ ElasticSuccess(_) => + logger.info( + s"✅ Successfully parsed multi-search results for query '${sql.getOrElse(query)}'" + ) + ElasticResult.success( + ElasticResponse( + sql, + query, + success.value, + fieldAliases, + aggs + ) + ) + case ElasticFailure(error) => + logger.error( + s"❌ Failed to parse multi-search results for query \n${sql.getOrElse(query)}\n -> ${error.message}" + ) + ElasticResult.failure( + error.copy( + operation = Some("multiSearch") + ) + ) + } case ElasticSuccess(_) => val error = ElasticError( - message = s"Failed to execute multi-search with ${elasticQueries.queries.size} queries", + message = s"Failed to execute multi-search for query \n${sql.getOrElse(query)}", operation = Some("multiSearch") ) logger.error(s"❌ ${error.message}") ElasticResult.failure(error) case ElasticFailure(error) => logger.error( - s"❌ Failed to execute multi-search with ${elasticQueries.queries.size} queries: ${error.message}" + s"❌ Failed to execute multi-search for query \n${sql.getOrElse(query)}\n -> ${error.message}" ) ElasticResult.failure( error.copy( @@ -301,25 +349,50 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { )(implicit ec: ExecutionContext ): Future[ElasticResult[ElasticResponse]] = { + val sql = elasticQuery.sql + val query = elasticQuery.query + val indices = elasticQuery.indices.mkString(",") executeSingleSearchAsync(elasticQuery).flatMap { case ElasticSuccess(Some(response)) => logger.info( - s"✅ Successfully executed asynchronous search for query '${elasticQuery.query}'" + s"✅ Successfully executed asynchronous search for query \n${sql.getOrElse(query)}\nin indices '$indices'" ) - Future.successful( - ElasticResult.success( - ElasticResponse( - elasticQuery.query, - response, - fieldAliases, - aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) + val aggs = aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) + ElasticResult.fromTry(parseResponse(response, fieldAliases, aggs)) match { + case success @ ElasticSuccess(_) => + logger.info( + s"✅ Successfully parsed search results for query \n${sql.getOrElse(query)}\nin indices '$indices'" ) - ) - ) + Future.successful( + ElasticResult.success( + ElasticResponse( + sql, + query, + success.value, + fieldAliases, + aggs + ) + ) + ) + case ElasticFailure(error) => + logger.error( + s"❌ Failed to parse search results for query \n${sql + .getOrElse(query)}\nin indices '$indices' -> ${error.message}" + ) + Future.successful( + ElasticResult.failure( + error.copy( + operation = Some("searchAsync"), + index = Some(indices) + ) + ) + ) + } case ElasticSuccess(_) => val error = ElasticError( - message = s"Failed to execute asynchronous search for query '${elasticQuery.query}'", + message = + s"Failed to execute asynchronous search for query \n${sql.getOrElse(query)}\nin indices '$indices'", index = Some(elasticQuery.indices.mkString(",")), operation = Some("searchAsync") ) @@ -327,7 +400,8 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { Future.successful(ElasticResult.failure(error)) case ElasticFailure(error) => logger.error( - s"❌ Failed to execute asynchronous search for query '${elasticQuery.query}': ${error.message}" + s"❌ Failed to execute asynchronous search for query \n${sql + .getOrElse(query)}\nin indices '$indices' -> ${error.message}" ) Future.successful( ElasticResult.failure( @@ -358,33 +432,57 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { )(implicit ec: ExecutionContext ): Future[ElasticResult[ElasticResponse]] = { + val query = elasticQueries.queries.map(_.query).mkString("\n") + val sql = elasticQueries.sql.orElse( + Option(elasticQueries.queries.flatMap(_.sql).mkString("\nUNION ALL\n")) + ) + executeMultiSearchAsync(elasticQueries).flatMap { case ElasticSuccess(Some(response)) => logger.info( - s"✅ Successfully executed asynchronous multi-search with ${elasticQueries.queries.size} queries" + s"✅ Successfully executed asynchronous multi-search for query \n${sql.getOrElse(query)}" ) - Future.successful( - ElasticResult.success( - ElasticResponse( - elasticQueries.queries.map(_.query).mkString("\n"), - response, - fieldAliases, - aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) + val aggs = aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) + ElasticResult.fromTry(parseResponse(response, fieldAliases, aggs)) match { + case success @ ElasticSuccess(_) => + logger.info( + s"✅ Successfully parsed multi-search results for query '${sql.getOrElse(query)}'" ) - ) - ) + Future.successful( + ElasticResult.success( + ElasticResponse( + sql, + query, + success.value, + fieldAliases, + aggs + ) + ) + ) + case ElasticFailure(error) => + logger.error( + s"❌ Failed to parse multi-search results for query \n${sql.getOrElse(query)}\n -> ${error.message}" + ) + Future.successful( + ElasticResult.failure( + error.copy( + operation = Some("multiSearchAsync") + ) + ) + ) + } case ElasticSuccess(_) => val error = ElasticError( message = - s"Failed to execute asynchronous multi-search with ${elasticQueries.queries.size} queries", + s"Failed to execute asynchronous multi-search for query \n${sql.getOrElse(query)}", operation = Some("multiSearchAsync") ) logger.error(s"❌ ${error.message}") Future.successful(ElasticResult.failure(error)) case ElasticFailure(error) => logger.error( - s"❌ Failed to execute asynchronous multi-search with ${elasticQueries.queries.size} queries: ${error.message}" + s"❌ Failed to execute asynchronous multi-search for query \n${sql.getOrElse(query)}\n -> ${error.message}" ) Future.successful( ElasticResult.failure( diff --git a/core/src/main/scala/app/softnetwork/elastic/client/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/package.scala index 412ca2ac..c14ea716 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/package.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/package.scala @@ -39,18 +39,21 @@ package object client extends SerializationApi { type JSONResults = String /** Elastic response case class + * @param sql + * - the SQL query if any * @param query * - the JSON query * @param results - * - the JSON results + * - the results as a sequence of rows * @param fieldAliases * - the field aliases used * @param aggregations * - the aggregations expected */ case class ElasticResponse( + sql: Option[String] = None, query: JSONQuery, - results: JSONResults, + results: Seq[Map[String, Any]], fieldAliases: Map[String, String], aggregations: Map[String, ClientAggregation] ) @@ -69,9 +72,14 @@ package object client extends SerializationApi { * @param types * - the target types @deprecated types are deprecated in ES 7+ */ - case class ElasticQuery(query: JSONQuery, indices: Seq[String], types: Seq[String] = Seq.empty) + case class ElasticQuery( + query: JSONQuery, + indices: Seq[String], + types: Seq[String] = Seq.empty, + sql: Option[String] = None + ) - case class ElasticQueries(queries: List[ElasticQuery]) + case class ElasticQueries(queries: List[ElasticQuery], sql: Option[String] = None) /** Retry configuration */ @@ -137,7 +145,9 @@ package object client extends SerializationApi { case class ClientAggregation( aggName: String, aggType: AggregationType.AggregationType, - distinct: Boolean + distinct: Boolean, + sourceField: String, + window: Boolean ) { def multivalued: Boolean = aggType == AggregationType.ArrayAgg def singleValued: Boolean = !multivalued @@ -155,6 +165,12 @@ package object client extends SerializationApi { case _: ArrayAgg => AggregationType.ArrayAgg case _ => throw new IllegalArgumentException(s"Unsupported aggregation type: ${agg.aggType}") } - ClientAggregation(agg.aggName, aggType, agg.distinct) + ClientAggregation( + agg.aggName, + aggType, + agg.distinct, + agg.sourceField, + agg.aggType.isInstanceOf[WindowFunction] + ) } } diff --git a/core/src/main/scala/app/softnetwork/elastic/client/scroll/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/scroll/package.scala index 36640ea0..72e2989b 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/scroll/package.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/scroll/package.scala @@ -27,7 +27,8 @@ package object scroll { maxDocuments: Option[Long] = None, // Optional maximum number of documents to retrieve preferSearchAfter: Boolean = true, // Prefer search_after over scroll when possible metrics: ScrollMetrics = ScrollMetrics(), // Initial scroll metrics - retryConfig: RetryConfig = RetryConfig() // Retry configuration + retryConfig: RetryConfig = RetryConfig(), // Retry configuration + failOnWindowError: Option[Boolean] = None ) /** Scroll strategy based on query type diff --git a/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala index 02a43f0d..620a403c 100644 --- a/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala +++ b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala @@ -51,7 +51,7 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver | } |}""".stripMargin - parseResponse(ElasticResponse("", results, Map.empty, Map.empty)) match { + parseResponse(results, Map.empty, Map.empty) match { case Success(rows) => rows.foreach(println) // Map(name -> Laptop, price -> 999.99, category -> Electronics, tags -> List(computer, portable), _id -> 1, _index -> products, _score -> 1.0) @@ -86,7 +86,9 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver | } |}""".stripMargin parseResponse( - ElasticResponse("", results, Map.empty, Map.empty) + results, + Map.empty, + Map.empty ) match { case Success(rows) => rows.foreach(println) @@ -180,16 +182,15 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver |}""".stripMargin parseResponse( - ElasticResponse( - "", - results, - Map.empty, - Map( - "top_products" -> ClientAggregation( - "top_products", - aggType = AggregationType.ArrayAgg, - distinct = false - ) + results, + Map.empty, + Map( + "top_products" -> ClientAggregation( + "top_products", + aggType = AggregationType.ArrayAgg, + distinct = false, + "name", + window = true ) ) ) match { @@ -287,7 +288,7 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver | } | } |}""".stripMargin - parseResponse(ElasticResponse("", results, Map.empty, Map.empty)) match { + parseResponse(results, Map.empty, Map.empty) match { case Success(rows) => rows.foreach(println) // Map(country -> France, country_doc_count -> 100, city -> Paris, city_doc_count -> 60, product -> Laptop, product_doc_count -> 30, total_sales -> 29997.0, avg_price -> 999.9) @@ -334,7 +335,7 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver | } | } |}""".stripMargin - parseResponse(ElasticResponse("", results, Map.empty, Map.empty)) match { + parseResponse(results, Map.empty, Map.empty) match { case Success(rows) => rows.foreach(println) // Map(date -> 2024-01-01T00:00:00.000Z, doc_count -> 100, total_sales -> 50000.0) @@ -634,16 +635,15 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver |}""".stripMargin parseResponse( - ElasticResponse( - "", - results, - Map.empty, - Map( - "employees" -> ClientAggregation( - aggName = "employees", - aggType = AggregationType.ArrayAgg, - distinct = false - ) + results, + Map.empty, + Map( + "employees" -> ClientAggregation( + aggName = "employees", + aggType = AggregationType.ArrayAgg, + distinct = false, + "name", + window = true ) ) ) match { diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestScrollApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestScrollApi.scala index 5d036251..af2cf47f 100644 --- a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestScrollApi.scala +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestScrollApi.scala @@ -263,15 +263,11 @@ trait JestScrollApi extends ScrollApi with JestClientHelpers { aggregations: Map[String, SQLAggregation] ): Seq[Map[String, Any]] = { val jsonString = jsonObject.toString - val sqlResponse = - ElasticResponse( - "", - jsonString, - fieldAliases, - aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) - ) - - parseResponse(sqlResponse) match { + parseResponse( + jsonString, + fieldAliases, + aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) + ) match { case Success(rows) => rows case Failure(ex) => logger.error(s"Failed to parse Jest scroll response: ${ex.getMessage}", ex) @@ -286,9 +282,8 @@ trait JestScrollApi extends ScrollApi with JestClientHelpers { fieldAliases: Map[String, String] ): Seq[Map[String, Any]] = { val jsonString = jsonObject.toString - val sqlResponse = ElasticResponse("", jsonString, fieldAliases, Map.empty) - parseResponse(sqlResponse) match { + parseResponse(jsonString, fieldAliases, Map.empty) match { case Success(rows) => rows case Failure(ex) => logger.error(s"Failed to parse Jest search after response: ${ex.getMessage}", ex) diff --git a/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala b/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala index a22aaf8f..9758537f 100644 --- a/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala +++ b/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala @@ -1345,15 +1345,11 @@ trait RestHighLevelClientScrollApi extends ScrollApi with RestHighLevelClientHel aggregations: Map[String, SQLAggregation] ): Seq[Map[String, Any]] = { val jsonString = response.toString - val sqlResponse = - ElasticResponse( - "", - jsonString, - fieldAliases, - aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) - ) - - parseResponse(sqlResponse) match { + parseResponse( + jsonString, + fieldAliases, + aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) + ) match { case Success(rows) => logger.debug(s"Parsed ${rows.size} rows from response") rows @@ -1370,9 +1366,7 @@ trait RestHighLevelClientScrollApi extends ScrollApi with RestHighLevelClientHel fieldAliases: Map[String, String] ): Seq[Map[String, Any]] = { val jsonString = response.toString - val sqlResponse = ElasticResponse("", jsonString, fieldAliases, Map.empty) - - parseResponse(sqlResponse) match { + parseResponse(jsonString, fieldAliases, Map.empty) match { case Success(rows) => rows case Failure(ex) => logger.error(s"Failed to parse search after response: ${ex.getMessage}", ex) diff --git a/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala b/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala index 30f5564e..63356f2e 100644 --- a/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala +++ b/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala @@ -1516,15 +1516,12 @@ trait RestHighLevelClientScrollApi extends ScrollApi with RestHighLevelClientHel aggregations: Map[String, SQLAggregation] ): Seq[Map[String, Any]] = { val jsonString = response.toString - val sqlResponse = - ElasticResponse( - "", - jsonString, - fieldAliases, - aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) - ) - parseResponse(sqlResponse) match { + parseResponse( + jsonString, + fieldAliases, + aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) + ) match { case Success(rows) => logger.debug(s"Parsed ${rows.size} rows from response") rows @@ -1541,9 +1538,8 @@ trait RestHighLevelClientScrollApi extends ScrollApi with RestHighLevelClientHel fieldAliases: Map[String, String] ): Seq[Map[String, Any]] = { val jsonString = response.toString - val sqlResponse = ElasticResponse("", jsonString, fieldAliases, Map.empty) - parseResponse(sqlResponse) match { + parseResponse(jsonString, fieldAliases, Map.empty) match { case Success(rows) => rows case Failure(ex) => logger.error(s"Failed to parse search after response: ${ex.getMessage}", ex) diff --git a/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala b/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala index 612a76d1..ec367082 100644 --- a/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala +++ b/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala @@ -1453,15 +1453,12 @@ trait JavaClientScrollApi extends ScrollApi with JavaClientHelpers { case Left(l) => convertToJson(l) case Right(r) => convertToJson(r) } - val sqlResponse = - ElasticResponse( - "", - jsonString, - fieldAliases, - aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) - ) - parseResponse(sqlResponse) match { + parseResponse( + jsonString, + fieldAliases, + aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) + ) match { case Success(rows) => logger.debug(s"Parsed ${rows.size} rows from response (hits + aggregations)") rows @@ -1478,9 +1475,8 @@ trait JavaClientScrollApi extends ScrollApi with JavaClientHelpers { fieldAliases: Map[String, String] ): Seq[Map[String, Any]] = { val jsonString = convertToJson(response) - val sqlResponse = ElasticResponse("", jsonString, fieldAliases, Map.empty) - parseResponse(sqlResponse) match { + parseResponse(jsonString, fieldAliases, Map.empty) match { case Success(rows) => logger.debug(s"Parsed ${rows.size} hits from response") rows diff --git a/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala b/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala index f6c3a4e3..7cd2634c 100644 --- a/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala +++ b/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala @@ -48,6 +48,7 @@ import co.elastic.clients.elasticsearch.core.reindex.{Destination, Source => ESS import co.elastic.clients.elasticsearch.core.search.{PointInTimeReference, SearchRequestBody} import co.elastic.clients.elasticsearch.indices.update_aliases.{Action, AddAction, RemoveAction} import co.elastic.clients.elasticsearch.indices.{ExistsRequest => IndexExistsRequest, _} +import co.elastic.clients.elasticsearch.sql.QueryRequest import com.google.gson.JsonParser import _root_.java.io.{IOException, StringReader} @@ -750,6 +751,11 @@ trait JavaClientGetApi extends GetApi with JavaClientHelpers { trait JavaClientSearchApi extends SearchApi with JavaClientHelpers { _: JavaClientCompanion with SerializationApi => + val response = apply().sql().query(new QueryRequest.Builder().query("SELECT 1").build()) + val row = response.rows().get(0) + val data = row.get(0) + data.toJson + override implicit def sqlSearchRequestToJsonQuery(sqlSearch: SQLSearchRequest): String = implicitly[ElasticSearchRequest](sqlSearch).query @@ -1446,10 +1452,12 @@ trait JavaClientScrollApi extends ScrollApi with JavaClientHelpers { case Left(l) => convertToJson(l) case Right(r) => convertToJson(r) } - val sqlResponse = - ElasticResponse("", jsonString, fieldAliases, aggregations.map(kv => kv._1 -> kv._2)) - parseResponse(sqlResponse) match { + parseResponse( + jsonString, + fieldAliases, + aggregations.map(kv => kv._1 -> kv._2) + ) match { case Success(rows) => logger.debug(s"Parsed ${rows.size} rows from response (hits + aggregations)") rows @@ -1466,9 +1474,8 @@ trait JavaClientScrollApi extends ScrollApi with JavaClientHelpers { fieldAliases: Map[String, String] ): Seq[Map[String, Any]] = { val jsonString = convertToJson(response) - val sqlResponse = ElasticResponse("", jsonString, fieldAliases, Map.empty) - parseResponse(sqlResponse) match { + parseResponse(jsonString, fieldAliases, Map.empty) match { case Success(rows) => logger.debug(s"Parsed ${rows.size} hits from response") rows diff --git a/macros/src/main/scala/app/softnetwork/elastic/sql/macros/SQLQueryValidator.scala b/macros/src/main/scala/app/softnetwork/elastic/sql/macros/SQLQueryValidator.scala index 3c3cd97e..35a4cd3d 100644 --- a/macros/src/main/scala/app/softnetwork/elastic/sql/macros/SQLQueryValidator.scala +++ b/macros/src/main/scala/app/softnetwork/elastic/sql/macros/SQLQueryValidator.scala @@ -17,6 +17,7 @@ package app.softnetwork.elastic.sql.macros import app.softnetwork.elastic.sql.`type`.{SQLType, SQLTypes} +import app.softnetwork.elastic.sql.function.aggregate.COUNT import app.softnetwork.elastic.sql.parser.Parser import app.softnetwork.elastic.sql.query.SQLSearchRequest @@ -199,7 +200,12 @@ trait SQLQueryValidator { // Check if any field is a wildcard (*) val hasWildcard = parsedQuery.select.fields.exists { field => - field.identifier.name == "*" + field.identifier.name == "*" && (field.aggregateFunction match { + case Some(COUNT) => + false + case _ => + true + }) } if (hasWildcard) { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala index 2a74dbf4..10fc24b4 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala @@ -149,6 +149,8 @@ case class SQLSearchRequest( ) .toMap .values + .groupBy(_.identifier.aliasOrName) + .map(_._2.head) .toSeq lazy val buckets: Seq[Bucket] = groupBy.map(_.buckets).getOrElse(Seq.empty) ++ windowBuckets From 96d8c4e15cef048df67b6636eb7a46e499d1565a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Sat, 22 Nov 2025 10:28:42 +0100 Subject: [PATCH 06/40] add full support for window functions --- .../elastic/client/ScrollApi.scala | 82 +- .../elastic/client/SearchApi.scala | 333 ++++- .../elastic/sql/SQLQuerySpec.scala | 10 +- .../client/JestClientWindowFunctionSpec.scala | 3 + ...estHighLevelClientWindowFunctionSpec.scala | 3 + ...estHighLevelClientWindowFunctionSpec.scala | 3 + .../client/JavaClientWindowFunctionSpec.scala | 3 + .../client/JavaClientWindowFunctionSpec.scala | 3 + .../elastic/client/EmployeeData.scala | 240 ++++ .../elastic/client/WindowFunctionSpec.scala | 1274 +++++++++++++++++ .../elastic/model/window/package.scala | 86 ++ 11 files changed, 2036 insertions(+), 4 deletions(-) create mode 100644 es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientWindowFunctionSpec.scala create mode 100644 es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientWindowFunctionSpec.scala create mode 100644 es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientWindowFunctionSpec.scala create mode 100644 es8/java/src/test/scala/app/softnetwork/elastic/client/JavaClientWindowFunctionSpec.scala create mode 100644 es9/java/src/test/scala/app/softnetwork/elastic/client/JavaClientWindowFunctionSpec.scala create mode 100644 testkit/src/main/scala/app/softnetwork/elastic/client/EmployeeData.scala create mode 100644 testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala create mode 100644 testkit/src/main/scala/app/softnetwork/elastic/model/window/package.scala diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala index 8db08707..5ea8bee5 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala @@ -19,6 +19,7 @@ package app.softnetwork.elastic.client import akka.NotUsed import akka.actor.ActorSystem import akka.stream.scaladsl.{Sink, Source} +import app.softnetwork.elastic.client.result.{ElasticFailure, ElasticResult, ElasticSuccess} import app.softnetwork.elastic.client.scroll.{ ScrollConfig, ScrollMetrics, @@ -28,11 +29,11 @@ import app.softnetwork.elastic.client.scroll.{ UseSearchAfter } import app.softnetwork.elastic.sql.macros.SQLQueryMacros -import app.softnetwork.elastic.sql.query.{SQLAggregation, SQLQuery} +import app.softnetwork.elastic.sql.query.{SQLAggregation, SQLQuery, SQLSearchRequest} import org.json4s.{Formats, JNothing} import org.json4s.jackson.JsonMethods.parse -import scala.concurrent.{ExecutionContext, Promise} +import scala.concurrent.{ExecutionContext, Future, Promise} import scala.language.experimental.macros import scala.util.{Failure, Success} @@ -121,6 +122,14 @@ trait ScrollApi extends ElasticClientHelpers { )(implicit system: ActorSystem): Source[(Map[String, Any], ScrollMetrics), NotUsed] = { sql.request match { case Some(Left(single)) => + if ( + single.windowFunctions.nonEmpty && (single.fields.nonEmpty || single.windowFunctions + .flatMap(_.fields) + .distinct + .size > 1) + ) + return scrollWithWindowEnrichment(sql, single, config) + val sqlRequest = single.copy(score = sql.score) val elasticQuery = ElasticQuery(sqlRequest, collection.immutable.Seq(sqlRequest.sources: _*)) @@ -365,4 +374,73 @@ trait ScrollApi extends ElasticClientHelpers { } } + // ======================================================================== + // WINDOW FUNCTION SEARCH + // ======================================================================== + + /** Scroll with window function enrichment + */ + private def scrollWithWindowEnrichment( + sql: SQLQuery, + request: SQLSearchRequest, + config: ScrollConfig + )(implicit system: ActorSystem): Source[(Map[String, Any], ScrollMetrics), NotUsed] = { + + implicit val ec: ExecutionContext = system.dispatcher + + logger.info(s"🪟 Scrolling with ${request.windowFunctions.size} window functions") + + // Execute window aggregations first + val windowCacheFuture: Future[ElasticResult[WindowCache]] = + Future(executeWindowAggregations(request)) + + // Create base query without window functions + val baseQuery = createBaseQuery(sql, request) + + // Stream and enrich + Source + .futureSource( + windowCacheFuture.map { + case ElasticSuccess(cache) => + scrollWithMetrics( + ElasticQuery( + baseQuery, + collection.immutable.Seq(baseQuery.sources: _*) + ), + baseQuery.fieldAliases, + baseQuery.sqlAggregations, + config, + baseQuery.sorts.nonEmpty + ) + .map { case (doc, metrics) => + val enrichedDoc = enrichDocumentWithWindowValues(doc, cache, request) + (enrichedDoc, metrics) + } + + case ElasticFailure(error) => + logger.error(s"❌ Failed to compute window functions: ${error.message}") + if (config.failOnWindowError.getOrElse(false)) { + // Strict mode: propagate the error + Source.failed( + new RuntimeException(s"Window function computation failed: ${error.message}") + ) + } else { + // Fallback: return base results without enrichment + logger.warn("⚠️ Falling back to base results without window enrichment") + scrollWithMetrics( + ElasticQuery( + baseQuery, + collection.immutable.Seq(baseQuery.sources: _*) + ), + baseQuery.fieldAliases, + baseQuery.sqlAggregations, + config, + baseQuery.sorts.nonEmpty + ) + } + } + ) + .mapMaterializedValue(_ => NotUsed) + } + } diff --git a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala index a75c75ec..32f89ed3 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala @@ -68,7 +68,15 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { collection.immutable.Seq(single.sources: _*), sql = Some(sql.query) ) - singleSearch(elasticQuery, single.fieldAliases, single.sqlAggregations) + if ( + single.windowFunctions.nonEmpty && (single.fields.nonEmpty || single.windowFunctions + .flatMap(_.fields) + .distinct + .size > 1) + ) + searchWithWindowEnrichment(sql, single) + else + singleSearch(elasticQuery, single.fieldAliases, single.sqlAggregations) case Some(Right(multiple)) => val elasticQueries = ElasticQueries( @@ -1083,4 +1091,327 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { ) } + // ======================================================================== + // WINDOW FUNCTION SEARCH + // ======================================================================== + + /** Search with window function enrichment + * + * Strategy: + * 1. Execute aggregation query to compute window values 2. Execute main query (without window + * functions) 3. Enrich results with window values + */ + private def searchWithWindowEnrichment( + sql: SQLQuery, + request: SQLSearchRequest + ): ElasticResult[ElasticResponse] = { + + logger.info(s"🪟 Detected ${request.windowFunctions.size} window functions") + + for { + // Step 1: Execute window aggregations + windowCache <- executeWindowAggregations(request) + + // Step 2: Execute base query (without window functions) + baseResponse <- executeBaseQuery(sql, request) + + // Step 3: Enrich results + enrichedResponse <- enrichResponseWithWindowValues(baseResponse, windowCache, request) + + } yield enrichedResponse + } + + // ======================================================================== + // WINDOW AGGREGATION EXECUTION + // ======================================================================== + + /** Execute aggregation queries for all window functions Returns a cache of partition key -> + * window values + */ + protected def executeWindowAggregations( + request: SQLSearchRequest + ): ElasticResult[WindowCache] = { + + // Build aggregation request + val aggRequest = buildWindowAggregationRequest(request) + val sql = aggRequest.sql + + logger.info( + s"🔍 Executing window aggregation query:\n$sql" + ) + + // Execute aggregation using existing search infrastructure + val elasticQuery = ElasticQuery( + aggRequest, + collection.immutable.Seq(aggRequest.sources: _*), + sql = Some(sql) + ) + + for { + // Use singleSearch to execute aggregation + aggResponse <- singleSearch( + elasticQuery, + aggRequest.fieldAliases, + aggRequest.sqlAggregations + ) + + // Parse aggregation results into cache + cache <- parseWindowAggregationsToCache(aggResponse, request) + + } yield cache + } + + /** Build aggregation request for window functions + */ + private def buildWindowAggregationRequest( + request: SQLSearchRequest + ): SQLSearchRequest = { + + // Create modified request with: + // - Only window buckets in GROUP BY + // - Only window aggregations in SELECT + // - No LIMIT (need all partitions) + // - Same WHERE clause (to match base query filtering) + request + .copy( + select = request.select.copy(fields = request.windowFields), + groupBy = request.groupBy.map(_.copy(buckets = request.windowBuckets)), + orderBy = None, // Not needed for aggregations + limit = None // Need all buckets + ) + .update() + } + + /** Parse aggregation response into window cache Uses your existing + * ElasticConversion.parseResponse + */ + private def parseWindowAggregationsToCache( + response: ElasticResponse, + request: SQLSearchRequest + ): ElasticResult[WindowCache] = { + + logger.info( + s"🔍 Parsing window aggregations to cache for query \n${response.sql.getOrElse(response.query)}" + ) + + val aggRows = response.results + + logger.info(s"✅ Parsed ${aggRows.size} aggregation buckets") + + // Build cache: partition key -> window values + val cache = aggRows.map { row => + val partitionKey = extractPartitionKey(row, request) + val windowValues = extractWindowValues(row, response.aggregations) + + partitionKey -> windowValues + }.toMap + + ElasticResult.success(WindowCache(cache)) + } + + // ======================================================================== + // BASE QUERY EXECUTION + // ======================================================================== + + /** Execute base query without window functions + */ + private def executeBaseQuery( + sql: SQLQuery, + request: SQLSearchRequest + ): ElasticResult[ElasticResponse] = { + + val baseQuery = createBaseQuery(sql, request) + + logger.info(s"🔍 Executing base query without window functions ${baseQuery.sql}") + + singleSearch( + ElasticQuery( + baseQuery, + collection.immutable.Seq(baseQuery.sources: _*), + sql = Some(baseQuery.sql) + ), + baseQuery.fieldAliases, + baseQuery.sqlAggregations + ) + } + + /** Create base query by removing window functions from SELECT + */ + protected def createBaseQuery( + sql: SQLQuery, + request: SQLSearchRequest + ): SQLSearchRequest = { + + // Remove window function fields from SELECT + val baseFields = request.select.fields.filterNot(_.windows.nonEmpty) + + // Create modified request + val baseRequest = request + .copy( + select = request.select.copy(fields = baseFields) + ) + .copy(score = sql.score) + .update() + + baseRequest + } + + /** Extract partition key from aggregation row + */ + private def extractPartitionKey( + row: Map[String, Any], + request: SQLSearchRequest + ): PartitionKey = { + + // Get all partition fields from window functions + val partitionFields = request.windowFunctions + .flatMap(_.partitionBy) + .map(_.aliasOrName) + .distinct + + if (partitionFields.isEmpty) { + return PartitionKey(Map("__global__" -> true)) + } + + val keyValues = partitionFields.flatMap { field => + row.get(field).map(field -> _) + }.toMap + + PartitionKey(keyValues) + } + + /** Extract window function values from aggregation row + */ + private def extractWindowValues( + row: Map[String, Any], + aggregations: Map[String, ClientAggregation] + ): WindowValues = { + + val values = aggregations + .filter(_._2.window) + .map { wf => + val fieldName = wf._1 + + val aggType = wf._2.aggType + + val sourceField = wf._2.sourceField + + // Get value from row (already processed by ElasticConversion) + val value = row.get(fieldName).orElse { + logger.warn(s"⚠️ Window function '$fieldName' not found in aggregation result") + None + } + + val validatedValue = + value match { + case Some(m: Map[String, Any]) => + m.get(sourceField) match { + case Some(v) => + aggType match { + case AggregationType.ArrayAgg => + v match { + case l: List[_] => + Some(l) + case other => + logger.warn( + s"⚠️ Expected List for ARRAY_AGG '$fieldName', got ${other.getClass.getSimpleName}" + ) + Some(List(other)) // Wrap into a List + } + case _ => Some(v) + } + case None => + None + } + case other => + other + } + + fieldName -> validatedValue + } + .collect { case (name, Some(value)) => + name -> value + } + + WindowValues(values) + } + + // ======================================================================== + // RESULT ENRICHMENT + // ======================================================================== + + /** Enrich response with window values + */ + private def enrichResponseWithWindowValues( + response: ElasticResponse, + cache: WindowCache, + request: SQLSearchRequest + ): ElasticResult[ElasticResponse] = { + + val baseRows = response.results + // Enrich each row + val enrichedRows = baseRows.map { row => + enrichDocumentWithWindowValues(row, cache, request) + } + + ElasticResult.success(response.copy(results = enrichedRows)) + } + + /** Enrich a single document with window values + */ + protected def enrichDocumentWithWindowValues( + doc: Map[String, Any], + cache: WindowCache, + request: SQLSearchRequest + ): Map[String, Any] = { + + if (request.windowFunctions.isEmpty) { + return doc + } + + // Build partition key from document + val partitionKey = extractPartitionKey(doc, request) + + // Lookup window values + cache.get(partitionKey) match { + case Some(windowValues) => + // Merge document with window values + doc ++ windowValues.values + + case None => + logger.warn(s"⚠️ No window values found for partition: ${partitionKey.values}") + + // Add null values for missing window functions + val nullValues = request.windowFunctions.map { wf => + wf.identifier.aliasOrName -> null + }.toMap + + doc ++ nullValues + } + } + + // ======================================================================== + // HELPER CASE CLASSES + // ======================================================================== + + /** Partition key for window function cache + */ + protected case class PartitionKey(values: Map[String, Any]) { + override def hashCode(): Int = values.hashCode() + override def equals(obj: Any): Boolean = obj match { + case other: PartitionKey => values == other.values + case _ => false + } + } + + /** Window function values for a partition + */ + protected case class WindowValues(values: Map[String, Any]) + + /** Cache of partition key -> window values + */ + protected case class WindowCache(cache: Map[PartitionKey, WindowValues]) { + def get(key: PartitionKey): Option[WindowValues] = cache.get(key) + def size: Int = cache.size + } } diff --git a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 29ffaa51..5ada90b9 100644 --- a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -3731,7 +3731,15 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { it should "test" in { val query = - """SELECT name FROM users WHERE status = 'active' AND age > 25 + """SELECT + | name, + | department, + | salary, + | FIRST_VALUE(salary) OVER ( + | PARTITION BY department + | ORDER BY hire_date + | ) as firstSalaryInDept + | FROM employees |""".stripMargin val select: ElasticSearchRequest = SQLQuery(query) println(select.query) diff --git a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientWindowFunctionSpec.scala b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientWindowFunctionSpec.scala new file mode 100644 index 00000000..886b7fe0 --- /dev/null +++ b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientWindowFunctionSpec.scala @@ -0,0 +1,3 @@ +package app.softnetwork.elastic.client + +class JestClientWindowFunctionSpec extends WindowFunctionSpec diff --git a/es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientWindowFunctionSpec.scala b/es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientWindowFunctionSpec.scala new file mode 100644 index 00000000..4df1d284 --- /dev/null +++ b/es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientWindowFunctionSpec.scala @@ -0,0 +1,3 @@ +package app.softnetwork.elastic.client + +class RestHighLevelClientWindowFunctionSpec extends WindowFunctionSpec diff --git a/es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientWindowFunctionSpec.scala b/es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientWindowFunctionSpec.scala new file mode 100644 index 00000000..4df1d284 --- /dev/null +++ b/es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientWindowFunctionSpec.scala @@ -0,0 +1,3 @@ +package app.softnetwork.elastic.client + +class RestHighLevelClientWindowFunctionSpec extends WindowFunctionSpec diff --git a/es8/java/src/test/scala/app/softnetwork/elastic/client/JavaClientWindowFunctionSpec.scala b/es8/java/src/test/scala/app/softnetwork/elastic/client/JavaClientWindowFunctionSpec.scala new file mode 100644 index 00000000..ad5e6daa --- /dev/null +++ b/es8/java/src/test/scala/app/softnetwork/elastic/client/JavaClientWindowFunctionSpec.scala @@ -0,0 +1,3 @@ +package app.softnetwork.elastic.client + +class JavaClientWindowFunctionSpec extends WindowFunctionSpec diff --git a/es9/java/src/test/scala/app/softnetwork/elastic/client/JavaClientWindowFunctionSpec.scala b/es9/java/src/test/scala/app/softnetwork/elastic/client/JavaClientWindowFunctionSpec.scala new file mode 100644 index 00000000..ad5e6daa --- /dev/null +++ b/es9/java/src/test/scala/app/softnetwork/elastic/client/JavaClientWindowFunctionSpec.scala @@ -0,0 +1,3 @@ +package app.softnetwork.elastic.client + +class JavaClientWindowFunctionSpec extends WindowFunctionSpec diff --git a/testkit/src/main/scala/app/softnetwork/elastic/client/EmployeeData.scala b/testkit/src/main/scala/app/softnetwork/elastic/client/EmployeeData.scala new file mode 100644 index 00000000..a98d3cba --- /dev/null +++ b/testkit/src/main/scala/app/softnetwork/elastic/client/EmployeeData.scala @@ -0,0 +1,240 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import akka.NotUsed +import akka.actor.ActorSystem +import akka.stream.scaladsl.Source +import app.softnetwork.elastic.client.bulk._ +import app.softnetwork.elastic.client.result.{ElasticFailure, ElasticSuccess} +import app.softnetwork.elastic.model.window.Employee +import app.softnetwork.persistence.generateUUID +import org.json4s.Formats +import org.scalatest.Suite + +import scala.language.implicitConversions + +trait EmployeeData { _: Suite => + + implicit val system: ActorSystem = ActorSystem(generateUUID()) + + implicit def formats: Formats + + def client: ElasticClientApi + + /** Load employees + */ + def loadEmployees(): Unit = { + + implicit val bulkOptions: BulkOptions = BulkOptions( + defaultIndex = "emp", + logEvery = 5 + ) + + val employees = getEmployees.zipWithIndex.map { case (emp, idx) => + serialization.write(emp.copy(id = s"emp_${idx + 1}")) + }.toList + + implicit def listToSource[T](list: List[T]): Source[T, NotUsed] = + Source.fromIterator(() => list.iterator) + + client.bulk[String](employees, identity, idKey = Some("id")) match { + case ElasticSuccess(response) => + println(s"✅ Bulk indexing completed:") + println(s" - Total items: ${response.metrics.totalDocuments}") + println(s" - Successful: ${response.successCount}") + println(s" - Failed: ${response.failedCount}") + println(s" - Took: ${response.metrics.durationMs}ms") + + // Afficher les erreurs éventuelles + val failures = response.failedDocuments + if (failures.nonEmpty) { + println(s" ⚠️ ${failures.size} documents failed:") + failures.foreach { item => + println(s" - Document ${item.id}: ${item.error.message}") + } + } + + case ElasticFailure(error) => + error.cause.foreach(t => t.printStackTrace()) + fail(s"❌ Bulk indexing failed: ${error.message}") + } + } + + // ======================================================================== + // HELPER METHODS + // ======================================================================== + + private def getEmployees: Seq[Employee] = Seq( + Employee( + "Alice Johnson", + "Engineering", + "New York", + 95000, + "2019-03-15", + "Senior", + List("Java", "Python", "Scala") + ), + Employee( + "Bob Smith", + "Engineering", + "New York", + 120000, + "2018-01-10", + "Lead", + List("Scala", "Spark", "Kafka") + ), + Employee( + "Charlie Brown", + "Engineering", + "San Francisco", + 85000, + "2020-06-20", + "Mid", + List("Python", "Django") + ), + Employee( + "Diana Prince", + "Engineering", + "San Francisco", + 110000, + "2017-09-05", + "Senior", + List("Go", "Kubernetes", "Docker") + ), + Employee( + "Eve Davis", + "Engineering", + "New York", + 75000, + "2021-02-12", + "Junior", + List("JavaScript", "React") + ), + Employee( + "Frank Miller", + "Sales", + "New York", + 80000, + "2019-07-22", + "Mid", + List("Salesforce", "CRM") + ), + Employee( + "Grace Lee", + "Sales", + "Chicago", + 90000, + "2018-11-30", + "Senior", + List("Negotiation", "B2B") + ), + Employee( + "Henry Wilson", + "Sales", + "Chicago", + 70000, + "2020-04-18", + "Junior", + List("Cold Calling") + ), + Employee( + "Iris Chen", + "Sales", + "New York", + 95000, + "2017-03-08", + "Lead", + List("Strategy", "Analytics") + ), + Employee( + "Jack Taylor", + "Marketing", + "San Francisco", + 78000, + "2019-10-01", + "Mid", + List("SEO", "Content") + ), + Employee( + "Karen White", + "Marketing", + "San Francisco", + 88000, + "2018-05-15", + "Senior", + List("Brand", "Digital") + ), + Employee( + "Leo Martinez", + "Marketing", + "Chicago", + 65000, + "2021-01-20", + "Junior", + List("Social Media") + ), + Employee( + "Maria Garcia", + "HR", + "New York", + 72000, + "2019-08-12", + "Mid", + List("Recruiting", "Onboarding") + ), + Employee("Nathan King", "HR", "Chicago", 68000, "2020-11-05", "Junior", List("Payroll")), + Employee( + "Olivia Scott", + "HR", + "New York", + 85000, + "2017-12-01", + "Senior", + List("Policy", "Compliance") + ), + Employee( + "Paul Anderson", + "Engineering", + "Remote", + 105000, + "2016-04-10", + "Senior", + List("Rust", "Systems") + ), + Employee("Quinn Roberts", "Sales", "Remote", 92000, "2019-02-28", "Senior", List("Enterprise")), + Employee( + "Rachel Green", + "Marketing", + "Remote", + 81000, + "2020-09-10", + "Mid", + List("Analytics", "PPC") + ), + Employee( + "Sam Turner", + "Engineering", + "San Francisco", + 130000, + "2015-06-01", + "Principal", + List("Architecture", "Leadership") + ), + Employee("Tina Brooks", "Sales", "Chicago", 75000, "2021-03-15", "Junior", List("B2C")) + ) +} diff --git a/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala b/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala new file mode 100644 index 00000000..fa27508b --- /dev/null +++ b/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala @@ -0,0 +1,1274 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import akka.stream.scaladsl.Sink +import app.softnetwork.elastic.client.result.{ElasticFailure, ElasticSuccess} +import app.softnetwork.elastic.client.scroll.ScrollConfig +import app.softnetwork.elastic.client.spi.ElasticClientFactory +import app.softnetwork.elastic.model.window._ +import app.softnetwork.elastic.scalatest.ElasticDockerTestKit +import org.scalatest.flatspec.AnyFlatSpecLike +import org.scalatest.matchers.should.Matchers +import org.slf4j.{Logger, LoggerFactory} + +import java.time.LocalDate +import scala.concurrent.Await +import scala.concurrent.duration._ + +trait WindowFunctionSpec + extends AnyFlatSpecLike + with ElasticDockerTestKit + with Matchers + with EmployeeData { + + lazy val log: Logger = LoggerFactory getLogger getClass.getName + + override def client: ElasticClientApi = ElasticClientFactory.create(elasticConfig) + + override def beforeAll(): Unit = { + super.beforeAll() + + val mapping = + """{ + | "properties": { + | "name": { + | "type": "text", + | "fields": { + | "keyword": { + | "type": "keyword" + | } + | } + | }, + | "department": { + | "type": "keyword" + | }, + | "location": { + | "type": "keyword" + | }, + | "salary": { + | "type": "integer" + | }, + | "hire_date": { + | "type": "date", + | "format": "yyyy-MM-dd" + | }, + | "level": { + | "type": "keyword" + | }, + | "skills": { + | "type": "keyword" + | } + | } + |}""".stripMargin + + client.createIndex("emp").get shouldBe true + + client.setMapping("emp", mapping).get shouldBe true + + loadEmployees() + } + + override def afterAll(): Unit = { + client.deleteIndex("emp") + // system.terminate() + super.afterAll() + } + + "Index mapping" should "have correct field types" in { + client.getMapping("emp") match { + case ElasticSuccess(mapping) => + log.info(s"📋 Mapping: $mapping") + + mapping should include("hire_date") + mapping should include("\"type\":\"date\"") + mapping should include("\"format\":\"yyyy-MM-dd\"") + + case ElasticFailure(error) => fail(s"Failed to get mapping: ${error.message}") + } + } + + "Sample document" should "have hire_date as string" in { + val results = client.searchAs[Employee](""" + SELECT + name, + department, + location, + salary, + hire_date, + level, + skills, + id + FROM emp + WHERE name.keyword = 'Sam Turner' + """) + + results match { + case ElasticSuccess(employees) => + employees should have size 1 + val sam = employees.head + + sam.name shouldBe "Sam Turner" + sam.hire_date shouldBe "2015-06-01" + + log.info(s"✅ Sam Turner hire_date: ${sam.hire_date}") + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + } + + // ======================================================================== + // BASIC WINDOW FUNCTION TESTS + // ======================================================================== + + "FIRST_VALUE window function" should "return first salary per department" in { + val results = client.searchAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + location, + level, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary + FROM emp + ORDER BY department, hire_date + """ + ) + + results match { + case ElasticSuccess(employees) => + employees should not be empty + + // Engineering: first hire = Sam Turner (2015-06-01, $130k) + val engineering = employees.filter(_.department == "Engineering") + engineering.foreach { emp => + emp.first_salary shouldBe Some(130000) + } + + // Sales: first hire = Iris Chen (2017-03-08, $95k) + val sales = employees.filter(_.department == "Sales") + sales.foreach { emp => + emp.first_salary shouldBe Some(95000) + } + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + } + + "LAST_VALUE window function" should "return last salary per department" in { + val results = client.searchAs[EmployeeWithWindow](""" + SELECT + department, + name, + salary, + hire_date, + location, + level, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS last_salary + FROM emp + ORDER BY department, hire_date + """) + + results match { + case ElasticSuccess(employees) => + employees should not be empty + + // Engineering: last hire = Eve Davis (2021-02-12, $75k) + val engineering = employees.filter(_.department == "Engineering") + engineering.foreach { emp => + emp.last_salary shouldBe Some(75000) + } + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + } + + /*"ROW_NUMBER window function" should "assign sequential numbers per partition" in { + val results = client.searchAs[EmployeeWithWindow](""" + SELECT + department, + name, + salary, + hire_date, + ROW_NUMBER() OVER ( + PARTITION BY department + ORDER BY salary DESC + ) AS row_number + FROM emp + ORDER BY department, row_number + """) + + results match { + case ElasticSuccess(employees) => + employees.groupBy(_.department).foreach { case (dept, emps) => + val rowNumbers = emps.flatMap(_.row_number).sorted + rowNumbers shouldBe (1 to emps.size).toList + + info(s"$dept: ${emps.size} employees numbered 1 to ${emps.size}") + } + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + } + + "RANK window function" should "handle ties correctly" in { + val results = client.searchAs[EmployeeWithWindow](""" + SELECT + department, + name, + salary, + hire_date, + RANK() OVER ( + PARTITION BY department + ORDER BY salary DESC + ) AS rank + FROM emp + ORDER BY department, rank + """) + + results match { + case ElasticSuccess(employees) => + employees.groupBy(_.department).foreach { case (dept, emps) => + val ranks = emps.flatMap(_.rank) + ranks.head shouldBe 1 // Top earner always rank 1 + + info(s"$dept top earner: ${emps.head.name} (${emps.head.salary})") + } + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + }*/ + + // ======================================================================== + // TESTS WITH FILTERS + // ======================================================================== + + "Window function with WHERE clause" should "apply filters before computation" in { + val results = client.searchAs[EmployeeWithWindow](""" + SELECT + department, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary + FROM emp + WHERE salary > 80000 + ORDER BY department, hire_date + """) + + results match { + case ElasticSuccess(employees) => + employees.foreach { emp => + emp.salary should be > 80000 + } + + // Engineering avec filtre: first = Paul Anderson (2016-04-10, $105k) + val engineering = employees.filter(_.department == "Engineering") + engineering should not be empty + engineering.foreach { emp => + emp.first_salary shouldBe Some(130000) + } + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + } + + "Window function with department filter" should "compute only for filtered data" in { + val results = client.searchAs[EmployeeWithWindow](""" + SELECT + department, + name, + salary, + hire_date, + location, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary + FROM emp + WHERE department IN ('Engineering', 'Sales') + ORDER BY department, hire_date + """) + + results match { + case ElasticSuccess(employees) => + employees.foreach { emp => + emp.department should (be("Engineering") or be("Sales")) + emp.first_salary shouldBe defined + } + + val departments = employees.map(_.department).distinct + departments should contain only ("Engineering", "Sales") + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + } + + // ======================================================================== + // TESTS WITH GLOBAL WINDOW + // ======================================================================== + + "Global window function" should "use same value for all rows" in { + val results = client.searchAs[EmployeeWithGlobalWindow](""" + SELECT + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER (ORDER BY hire_date ASC) AS first_ever_salary, + LAST_VALUE(salary) OVER ( + ORDER BY hire_date ASC + ) AS last_ever_salary + FROM emp + ORDER BY hire_date + LIMIT 20 + """) + + results match { + case ElasticSuccess(employees) => + employees should have size 20 + + // Premier embauché: Sam Turner (2015-06-01, $130k) + employees.foreach { emp => + emp.first_ever_salary shouldBe Some(130000) + } + + // Dernier embauché: Tina Brooks (2021-03-15, $75k) + employees.foreach { emp => + emp.last_ever_salary shouldBe Some(75000) + } + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + } + + // ======================================================================== + // TESTS WITH MULTIPLE PARTITIONS + // ======================================================================== + + "Multiple partition keys" should "compute independently" in { + val results = client.searchAs[EmployeeMultiPartition](""" + SELECT + department, + location, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department, location + ORDER BY hire_date ASC + ) AS first_in_dept_loc + FROM emp + WHERE department IN ('Engineering', 'Sales') + ORDER BY department, location, hire_date + """) + + results match { + case ElasticSuccess(employees) => + employees + .groupBy(e => (e.department, e.location)) + .foreach { case ((dept, loc), emps) => + val firstValues = emps.flatMap(_.first_in_dept_loc).distinct + firstValues should have size 1 + + info(s"$dept @ $loc: first_salary = ${firstValues.head}") + } + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + } + + // ======================================================================== + // TESTS WITH LIMIT + // ======================================================================== + + "Window function with LIMIT" should "return correct number of results" in { + val results = client.searchAs[EmployeeWithWindow](""" + SELECT + department, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary + FROM emp + ORDER BY salary DESC + LIMIT 10 + """) + + results match { + case ElasticSuccess(employees) => + employees should have size 10 + + // Top salary: Sam Turner ($130k) + employees.head.name shouldBe "Sam Turner" + employees.head.salary shouldBe 130000 + + // Tous les salaires >= $80k + employees.foreach { emp => + emp.salary should be >= 80000 + } + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + } + + // ======================================================================== + // TESTS WITH AGGREGATIONS + // ======================================================================== + + "Window function with aggregations" should "combine GROUP BY and OVER" in { + val results = client.searchAs[DepartmentStats](""" + SELECT + department, + AVG(salary) AS avg_salary, + MAX(salary) AS max_salary, + MIN(salary) AS min_salary, + COUNT(*) AS employee_count + FROM emp + GROUP BY department + ORDER BY avg_salary DESC + """) + + results match { + case ElasticSuccess(departments) => + departments should not be empty + + val engineering = departments.find(_.department == "Engineering") + engineering shouldBe defined + engineering.get.max_salary shouldBe 130000 // Sam Turner + engineering.get.min_salary shouldBe 75000 // Eve Davis + engineering.get.employee_count shouldBe 6 + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + } + + // ======================================================================== + // SCROLL TESTS + // ======================================================================== + + "Scroll with FIRST_VALUE" should "stream all employees with window enrichment" in { + val config = ScrollConfig(scrollSize = 5) + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """SELECT + department, + name, + salary, + hire_date, + location, + level, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary + FROM emp + ORDER BY department, hire_date + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + results should have size 20 + + // Vérifier la cohérence par département + results.map(_._1).groupBy(_.department).foreach { case (dept, emps) => + val firstSalaries = emps.flatMap(_.first_salary).distinct + firstSalaries should have size 1 + + info(s"$dept: first_salary = ${firstSalaries.head}") + } + } + + "Scroll with multiple window functions" should "enrich with multiple columns" in { + val config = ScrollConfig(scrollSize = 3, logEvery = 5) + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + location, + level, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS last_salary + FROM emp + ORDER BY department, hire_date + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + results should have size 20 + + results.foreach { case (emp, _) => + emp.first_salary shouldBe defined + emp.last_salary shouldBe defined + } + } + + // ======================================================================== + // TESTS DE PERFORMANCE + // ======================================================================== + + "Window functions performance" should "maintain good throughput" in { + val config = ScrollConfig(scrollSize = 5, logEvery = 10) + + val startTime = System.currentTimeMillis() + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS last_salary + FROM emp + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + val duration = System.currentTimeMillis() - startTime + + results should have size 20 + duration should be < 5000L + + info(s"Scrolled ${results.size} documents with 3 window functions in ${duration}ms") + } + + // ======================================================================== + // TEST WITH MINIMAL CASE CLASS + // ======================================================================== + + "Minimal case class" should "work with partial SELECT" in { + val results = client.searchAs[EmployeeMinimal](""" + SELECT + name, + department, + salary + FROM emp + WHERE salary > 100000 + ORDER BY salary DESC + """) + + results match { + case ElasticSuccess(employees) => + employees should not be empty + employees.foreach { emp => + emp.salary should be > 100000 + } + + // Top 3: Sam Turner, Bob Smith, Diana Prince + employees.take(3).map(_.name) should contain allOf ( + "Sam Turner", "Bob Smith", "Diana Prince" + ) + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + } + + // ======================================================================== + // SCROLL: FIRST_VALUE + // ======================================================================== + + "Scroll with FIRST_VALUE" should "stream all employees with first salary per department" in { + val config = ScrollConfig(scrollSize = 5, logEvery = 10) + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + location, + level, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary + FROM emp + ORDER BY department, hire_date + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + results should have size 20 + + // Vérifier la cohérence par département + val employees = results.map(_._1) + + val engineering = employees.filter(_.department == "Engineering") + engineering.foreach { emp => + emp.first_salary shouldBe Some(130000) // Sam Turner (2015-06-01) + } + + val sales = employees.filter(_.department == "Sales") + sales.foreach { emp => + emp.first_salary shouldBe Some(95000) // Iris Chen (2017-03-08) + } + + val marketing = employees.filter(_.department == "Marketing") + marketing.foreach { emp => + emp.first_salary shouldBe Some(88000) // Karen White (2018-05-15) + } + + info(s"✅ Scrolled ${results.size} employees with FIRST_VALUE") + } + + "Scroll with FIRST_VALUE and small batches" should "handle pagination correctly" in { + val config = ScrollConfig(scrollSize = 2, logEvery = 5) + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary + FROM emp + WHERE department = 'Engineering' + ORDER BY hire_date + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + results should have size 7 // 7 engineers + + val employees = results.map(_._1) + employees.foreach { emp => + emp.department shouldBe "Engineering" + emp.first_salary shouldBe Some(130000) + } + + // Vérifier l'ordre chronologique + val hireDates = employees.map(_.hire_date) + hireDates shouldBe hireDates.sorted + + info(s"✅ Scrolled ${results.size} engineers in batches of 2") + } + + // ======================================================================== + // SCROLL: LAST_VALUE + // ======================================================================== + + "Scroll with LAST_VALUE" should "stream all employees with last salary per department" in { + val config = ScrollConfig(scrollSize = 4, logEvery = 8) + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + location, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS last_salary + FROM emp + ORDER BY department, hire_date + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + results should have size 20 + + val employees = results.map(_._1) + + val engineering = employees.filter(_.department == "Engineering") + engineering.foreach { emp => + emp.last_salary shouldBe Some(75000) // Eve Davis (2021-02-12) + } + + val sales = employees.filter(_.department == "Sales") + sales.foreach { emp => + emp.last_salary shouldBe Some(75000) // Tina Brooks (2021-03-15) + } + + info(s"✅ Scrolled ${results.size} employees with LAST_VALUE") + } + + "Scroll with LAST_VALUE and filter" should "apply WHERE before window computation" in { + val config = ScrollConfig(scrollSize = 3) + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS last_salary + FROM emp + WHERE salary > 80000 + ORDER BY department, hire_date + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + val employees = results.map(_._1) + + employees.foreach { emp => + emp.salary should be > 80000 + } + + // Engineering avec filtre: last = Diana Prince (2017-09-05, $110k) + val engineering = employees.filter(_.department == "Engineering") + engineering.foreach { emp => + emp.last_salary shouldBe Some(85000) + } + + info(s"✅ Scrolled ${employees.size} high-salary employees with LAST_VALUE") + } + + // ======================================================================== + // SCROLL: ROW_NUMBER + // ======================================================================== + + /*"Scroll with ROW_NUMBER" should "assign sequential numbers per partition" in { + val config = ScrollConfig(scrollSize = 5) + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + ROW_NUMBER() OVER ( + PARTITION BY department + ORDER BY salary DESC + ) AS row_number + FROM emp + ORDER BY department, row_number + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + results should have size 20 + + val employees = results.map(_._1) + + employees.groupBy(_.department).foreach { case (dept, emps) => + val rowNumbers = emps.flatMap(_.row_number).sorted + rowNumbers shouldBe (1 to emps.size).toList + + // Top earner (row_number = 1) + val topEarner = emps.find(_.row_number.contains(1)).get + + dept match { + case "Engineering" => topEarner.name shouldBe "Sam Turner" + case "Sales" => topEarner.name shouldBe "Iris Chen" + case "Marketing" => topEarner.name shouldBe "Karen White" + case "HR" => topEarner.name shouldBe "Olivia Scott" + case _ => // OK + } + + info(s"$dept: ${emps.size} employees, top earner = ${topEarner.name}") + } + } + + "Scroll with ROW_NUMBER and LIMIT simulation" should "get top N per department" in { + val config = ScrollConfig(scrollSize = 10) + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + ROW_NUMBER() OVER ( + PARTITION BY department + ORDER BY salary DESC + ) AS row_number + FROM emp + ORDER BY department, row_number + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + val employees = results.map(_._1) + + // Filtrer les top 2 par département + val top2PerDept = employees + .filter(_.row_number.exists(_ <= 2)) + .groupBy(_.department) + + top2PerDept.foreach { case (dept, emps) => + emps should have size 2 + info(s"$dept top 2: ${emps.map(e => s"${e.name} ($${e.salary})").mkString(", ")}") + } + } + + // ======================================================================== + // SCROLL: RANK + // ======================================================================== + + "Scroll with RANK" should "handle ties correctly" in { + val config = ScrollConfig(scrollSize = 5) + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + RANK() OVER ( + PARTITION BY department + ORDER BY salary DESC + ) AS rank + FROM emp + ORDER BY department, rank + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + results should have size 20 + + val employees = results.map(_._1) + + employees.groupBy(_.department).foreach { case (dept, emps) => + val ranks = emps.flatMap(_.rank) + ranks.head shouldBe 1 // Top earner always rank 1 + + val topEarner = emps.head + info(s"$dept rank 1: ${topEarner.name} ($${topEarner.salary})") + } + }*/ + + // ======================================================================== + // SCROLL: GLOBAL WINDOW + // ======================================================================== + + "Scroll with global window" should "use same value for all rows" in { + val config = ScrollConfig(scrollSize = 7) + + val futureResults = client + .scrollAs[EmployeeWithGlobalWindow]( + """ + SELECT + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER (ORDER BY hire_date ASC) AS first_ever_salary, + LAST_VALUE(salary) OVER ( + ORDER BY hire_date ASC + ) AS last_ever_salary + FROM emp + ORDER BY hire_date + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + results should have size 20 + + val employees = results.map(_._1) + + // Premier embauché: Sam Turner (2015-06-01, $130k) + employees.foreach { emp => + emp.first_ever_salary shouldBe Some(130000) + } + + // Dernier embauché: Tina Brooks (2021-03-15, $75k) + employees.foreach { emp => + emp.last_ever_salary shouldBe Some(75000) + } + + // Vérifier l'ordre chronologique + val hireDates = employees.map(_.hire_date) + hireDates shouldBe hireDates.sorted + + info(s"✅ Global window: first = $$130k, last = $$75k") + } + + // ======================================================================== + // SCROLL: MULTIPLE PARTITIONS + // ======================================================================== + + "Scroll with multiple partition keys" should "compute independently" in { + val config = ScrollConfig(scrollSize = 4) + + val futureResults = client + .scrollAs[EmployeeMultiPartition]( + """ + SELECT + department, + location, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department, location + ORDER BY hire_date ASC + ) AS first_in_dept_loc + FROM emp + WHERE department IN ('Engineering', 'Sales') + ORDER BY department, location, hire_date + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + val employees = results.map(_._1) + + employees + .groupBy(e => (e.department, e.location)) + .foreach { case ((dept, loc), emps) => + val firstValues = emps.flatMap(_.first_in_dept_loc).distinct + firstValues should have size 1 + + info(s"$dept @ $loc: first_salary = ${firstValues.head}, ${emps.size} employees") + } + + // Engineering @ New York: Alice (2019-03-15, $95k) ou Bob (2018-01-10, $120k) + val engNY = employees.filter(e => e.department == "Engineering" && e.location == "New York") + engNY.foreach { emp => + emp.first_in_dept_loc shouldBe Some(120000) // Bob Smith + } + } + + // ======================================================================== + // SCROLL WITH COMPLEX FILTERS + // ======================================================================== + + "Scroll with complex WHERE clause" should "apply all filters before window" in { + val config = ScrollConfig(scrollSize = 5) + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + location, + level, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary + FROM emp + WHERE salary > 80000 + AND hire_date >= '2018-01-01' + AND department IN ('Engineering', 'Sales') + ORDER BY department, hire_date + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + val employees = results.map(_._1) + + employees.foreach { emp => + emp.salary should be > 80000 + emp.hire_date should be >= LocalDate.of(2018, 1, 1) + emp.department should (be("Engineering") or be("Sales")) + } + + info(s"✅ Filtered scroll: ${employees.size} employees matching criteria") + } + + // ======================================================================== + // SCROLL: PERFORMANCE AND MONITORING + // ======================================================================== + + "Scroll with performance monitoring" should "track progress and timing" in { + val config = ScrollConfig( + scrollSize = 5, + logEvery = 5 + ) + + val startTime = System.currentTimeMillis() + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS last_salary + FROM emp + """, + config + ) + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + val duration = System.currentTimeMillis() - startTime + + results should have size 20 + duration should be < 5000L + + val employees = results.map(_._1) + + // Vérifier que toutes les colonnes window sont présentes + employees.foreach { emp => + emp.first_salary shouldBe defined + emp.last_salary shouldBe defined + } + + info(s"✅ Scrolled ${results.size} docs with 4 window functions in ${duration}ms") + info(s" Throughput: ${results.size * 1000 / duration} docs/sec") + } + + // ======================================================================== + // SCROLL: STREAMING WITH TRANSFORMATION + // ======================================================================== + + "Scroll with stream transformation" should "process results on-the-fly" in { + val config = ScrollConfig(scrollSize = 3) + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary + FROM emp + ORDER BY department, salary DESC + """, + config + ) + .map { case (emp, scrollId) => + // Transformation: calculer le % vs premier salaire + val pctVsFirst = emp.first_salary.map { first => + (emp.salary.toDouble / first * 100).round.toInt + } + + (emp, pctVsFirst, scrollId) + } + .filter { case (emp, pct, _) => + // Ne garder que les top earners (row_number <= 3) + emp.row_number.exists(_ <= 3) + } + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + results.foreach { case (emp, pctVsFirst, _) => + info( + s"${emp.department} - ${emp.name}: $${emp.salary} (${pctVsFirst.getOrElse("N/A")}% vs first)" + ) + } + + // Chaque département devrait avoir max 3 employés + val countPerDept = results.map(_._1).groupBy(_.department).mapValues(_.size) + countPerDept.values.foreach { count => + count should be <= 3 + } + } + + // ======================================================================== + // SCROLL WITH DOWNSTREAM AGGREGATION + // ======================================================================== + + "Scroll with downstream aggregation" should "compute stats from stream" in { + val config = ScrollConfig(scrollSize = 4) + + val futureStats = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary + FROM emp + """, + config + ) + .map(_._1) + .runFold(Map.empty[String, (Int, Int, Int)]) { case (acc, emp) => + // Calculer min/max/count par département + val (min, max, count) = acc.getOrElse(emp.department, (Int.MaxValue, 0, 0)) + + acc + (emp.department -> ( + math.min(min, emp.salary), + math.max(max, emp.salary), + count + 1 + )) + } + + val stats = Await.result(futureStats, 30.seconds) + + stats should not be empty + + stats.foreach { case (dept, (min, max, count)) => + info(s"$dept: $count employees, salary range: $$${min} - $$${max}") + } + + // Engineering: 7 employees, $75k - $130k + stats("Engineering") shouldBe (75000, 130000, 7) + + // Sales: 6 employees, $70k - $95k + stats("Sales") shouldBe (70000, 95000, 6) + } + + // ======================================================================== + // SCROLL: ERROR HANDLING + // ======================================================================== + + "Scroll with error handling" should "handle failures gracefully" in { + val config = ScrollConfig(scrollSize = 5) + + val futureResults = client + .scrollAs[EmployeeWithWindow]( + """ + SELECT + department, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary + FROM emp + """, + config + ) + .recover { case ex => + info(s"⚠️ Scroll error: ${ex.getMessage}") + (EmployeeWithWindow("", "", 0, LocalDate.now), config.metrics) + } + .filter(_._1.department.nonEmpty) // Filtrer les erreurs + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds) + + results should not be empty + results.foreach { case (emp, _) => + emp.department should not be empty + } + } +} diff --git a/testkit/src/main/scala/app/softnetwork/elastic/model/window/package.scala b/testkit/src/main/scala/app/softnetwork/elastic/model/window/package.scala new file mode 100644 index 00000000..1074e96b --- /dev/null +++ b/testkit/src/main/scala/app/softnetwork/elastic/model/window/package.scala @@ -0,0 +1,86 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.model + +import java.time.LocalDate + +package object window { + + case class Employee( + name: String, + department: String, + location: String, + salary: Int, + hire_date: String, + level: String, + skills: List[String], + id: String = "" + ) + + case class EmployeeWithWindow( + department: String, + name: String, + salary: Int, + hire_date: LocalDate, + location: Option[String] = None, + level: Option[String] = None, + skills: Option[List[String]] = None, + first_salary: Option[Int] = None, + last_salary: Option[Int] = None, + rank: Option[Int] = None, + row_number: Option[Int] = None + ) + + case class DepartmentStats( + department: String, + avg_salary: Double, + max_salary: Int, + min_salary: Int, + employee_count: Long + ) + + case class DepartmentWithWindow( + department: String, + location: Option[String] = None, + avg_salary: Option[Double] = None, + top_earners: Option[List[String]] = None, + first_hire_date: Option[String] = None + ) + + case class EmployeeMinimal( + name: String, + department: String, + salary: Int + ) + + case class EmployeeWithGlobalWindow( + name: String, + salary: Int, + hire_date: String, + first_ever_salary: Option[Int] = None, + last_ever_salary: Option[Int] = None + ) + + case class EmployeeMultiPartition( + department: String, + location: String, + name: String, + salary: Int, + hire_date: String, + first_in_dept_loc: Option[Int] = None + ) +} From d69d691b2febef71c47e680e3dfe2b7737d21eac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Sun, 23 Nov 2025 09:56:47 +0100 Subject: [PATCH 07/40] to fix COUNT(*) --- .../elastic/sql/bridge/ElasticAggregation.scala | 9 +++++++-- .../scala/app/softnetwork/elastic/client/SearchApi.scala | 4 ++-- .../elastic/sql/bridge/ElasticAggregation.scala | 9 +++++++-- .../softnetwork/elastic/client/WindowFunctionSpec.scala | 3 +-- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 92d81665..1275c072 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -145,10 +145,15 @@ object ElasticAggregation { val _agg = aggType match { case COUNT => + val field = + sourceField match { + case "*" | "_id" | "_index" | "_type" => "_id" + case _ => sourceField + } if (distinct) - cardinalityAgg(aggName, sourceField) + cardinalityAgg(aggName, field) else { - valueCountAgg(aggName, sourceField) + valueCountAgg(aggName, field) } case MIN => aggWithFieldOrScript(minAgg, (name, s) => minAgg(name, sourceField).script(s)) case MAX => aggWithFieldOrScript(maxAgg, (name, s) => maxAgg(name, sourceField).script(s)) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala index 32f89ed3..79bc30b3 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala @@ -137,7 +137,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { val indices = elasticQuery.indices.mkString(",") logger.debug( - s"Searching with query \n${sql.getOrElse(query)}\nin indices '$indices'" + s"🔍 Searching with query \n${sql.getOrElse(query)}\nin indices '$indices'" ) executeSingleSearch(elasticQuery) match { @@ -236,7 +236,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { ) logger.debug( - s"Multi-searching with query \n${sql.getOrElse(query)}" + s"🔍 Multi-searching with query \n${sql.getOrElse(query)}" ) executeMultiSearch(elasticQueries) match { diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 7a046ada..1313f27a 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -145,10 +145,15 @@ object ElasticAggregation { val _agg = aggType match { case COUNT => + val field = + sourceField match { + case "*" | "_id" | "_index" | "_type" => "_id" + case _ => sourceField + } if (distinct) - cardinalityAgg(aggName, sourceField) + cardinalityAgg(aggName, field) else { - valueCountAgg(aggName, sourceField) + valueCountAgg(aggName, field) } case MIN => aggWithFieldOrScript(minAgg, (name, s) => minAgg(name, sourceField).script(s)) case MAX => aggWithFieldOrScript(maxAgg, (name, s) => maxAgg(name, sourceField).script(s)) diff --git a/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala b/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala index fa27508b..e2e1d87e 100644 --- a/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala +++ b/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala @@ -463,7 +463,6 @@ trait WindowFunctionSpec COUNT(*) AS employee_count FROM emp GROUP BY department - ORDER BY avg_salary DESC """) results match { @@ -474,7 +473,7 @@ trait WindowFunctionSpec engineering shouldBe defined engineering.get.max_salary shouldBe 130000 // Sam Turner engineering.get.min_salary shouldBe 75000 // Eve Davis - engineering.get.employee_count shouldBe 6 + engineering.get.employee_count shouldBe 7 case ElasticFailure(error) => fail(s"Query failed: ${error.message}") From 599694ee0e3ef38e78f90dab941e21d4abf89d8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 24 Nov 2025 11:55:46 +0100 Subject: [PATCH 08/40] add isAggregation and asAggregation functions, fix _source for pure aggregations, update group by and order by to allow functions, update aggregations to allow complex expressions, implements script sort within order by --- .../sql/bridge/ElasticAggregation.scala | 211 ++++++++++++------ .../elastic/sql/bridge/package.scala | 64 +++++- .../elastic/sql/SQLQuerySpec.scala | 60 ++--- .../softnetwork/elastic/client/package.scala | 4 - .../sql/bridge/ElasticAggregation.scala | 211 ++++++++++++------ .../elastic/sql/bridge/package.scala | 67 +++++- .../elastic/sql/SQLQuerySpec.scala | 67 +++--- .../sql/function/aggregate/package.scala | 4 +- .../elastic/sql/function/package.scala | 4 +- .../operator/math/ArithmeticExpression.scala | 1 + .../app/softnetwork/elastic/sql/package.scala | 8 +- .../elastic/sql/parser/GroupByParser.scala | 11 +- .../elastic/sql/parser/OrderByParser.scala | 19 +- .../parser/function/aggregate/package.scala | 14 +- .../elastic/sql/query/GroupBy.scala | 6 +- .../elastic/sql/query/OrderBy.scala | 13 +- .../elastic/sql/query/SQLSearchRequest.scala | 13 +- .../elastic/sql/query/Select.scala | 7 +- .../softnetwork/elastic/sql/query/Where.scala | 18 +- 19 files changed, 551 insertions(+), 251 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 1275c072..c7ac4ba6 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -17,6 +17,7 @@ package app.softnetwork.elastic.sql.bridge import app.softnetwork.elastic.sql.PainlessContext +import app.softnetwork.elastic.sql.`type`.SQLTemporal import app.softnetwork.elastic.sql.query.{ Asc, Bucket, @@ -31,6 +32,8 @@ import app.softnetwork.elastic.sql.query.{ } import app.softnetwork.elastic.sql.function._ import app.softnetwork.elastic.sql.function.aggregate._ +import app.softnetwork.elastic.sql.function.time.DateTrunc +import app.softnetwork.elastic.sql.time.TimeUnit import com.sksamuel.elastic4s.ElasticApi.{ avgAgg, bucketSelectorAggregation, @@ -44,11 +47,14 @@ import com.sksamuel.elastic4s.ElasticApi.{ valueCountAgg } import com.sksamuel.elastic4s.requests.script.Script +import com.sksamuel.elastic4s.requests.searches.DateHistogramInterval import com.sksamuel.elastic4s.requests.searches.aggs.{ Aggregation, CardinalityAggregation, + DateHistogramAggregation, ExtendedStatsAggregation, FilterAggregation, + HistogramOrder, NestedAggregation, StatsAggregation, TermsAggregation, @@ -93,7 +99,10 @@ object ElasticAggregation { import sqlAgg._ val sourceField = identifier.path - val direction = bucketsDirection.get(identifier.identifierName) + val direction = + bucketsDirection + .get(identifier.identifierName) + .orElse(bucketsDirection.get(identifier.aliasOrName)) val field = fieldAlias match { case Some(alias) => alias.alias @@ -190,13 +199,13 @@ object ElasticAggregation { sort.order match { case Some(Desc) => th.window match { - case LAST_VALUE => FieldSort(sort.field).asc() - case _ => FieldSort(sort.field).desc() + case LAST_VALUE => FieldSort(sort.field.aliasOrName).asc() + case _ => FieldSort(sort.field.aliasOrName).desc() } case _ => th.window match { - case LAST_VALUE => FieldSort(sort.field).desc() - case _ => FieldSort(sort.field).asc() + case LAST_VALUE => FieldSort(sort.field.aliasOrName).desc() + case _ => FieldSort(sort.field.aliasOrName).asc() } } ) @@ -272,82 +281,152 @@ object ElasticAggregation { having: Option[Criteria], nested: Option[NestedElement], allElasticAggregations: Seq[ElasticAggregation] - ): Option[TermsAggregation] = { - buckets.reverse.foldLeft(Option.empty[TermsAggregation]) { (current, bucket) => + ): Option[Aggregation] = { + var first = false + val nbBuckets = buckets.size + buckets.reverse.foldLeft(Option.empty[Aggregation]) { (current, bucket) => // Determine the bucketPath of the current bucket val currentBucketPath = bucket.identifier.path - var agg = { - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - termsAgg(bucket.name, currentBucketPath) - .order(Seq(direction match { - case Asc => TermsOrder("_key", asc = true) - case _ => TermsOrder("_key", asc = false) - })) - case None => - termsAgg(bucket.name, currentBucketPath) + val minDocCount = + if ((first || current.isEmpty) && nbBuckets > 1) { + 0 + } else { + first = true + 1 + } + + var agg: Aggregation = { + bucket.out match { + case _: SQLTemporal => + val functions = bucket.identifier.functions + val interval: Option[DateHistogramInterval] = + if (functions.size == 1) { + functions.head match { + case trunc: DateTrunc => + trunc.unit match { + case TimeUnit.YEARS => Option(DateHistogramInterval.Year) + case TimeUnit.QUARTERS => Option(DateHistogramInterval.Quarter) + case TimeUnit.MONTHS => Option(DateHistogramInterval.Month) + case TimeUnit.WEEKS => Option(DateHistogramInterval.Week) + case TimeUnit.DAYS => Option(DateHistogramInterval.Day) + case TimeUnit.HOURS => Option(DateHistogramInterval.Hour) + case TimeUnit.MINUTES => Option(DateHistogramInterval.Minute) + case TimeUnit.SECONDS => Option(DateHistogramInterval.Second) + case _ => None + } + case _ => None + } + } else { + None + } + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .field(currentBucketPath) + .minDocCount(minDocCount) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .field(currentBucketPath) + .minDocCount(minDocCount) + } + case _ => + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + termsAgg(bucket.name, currentBucketPath) + .minDocCount(minDocCount) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + termsAgg(bucket.name, currentBucketPath) + .minDocCount(minDocCount) + } } } - bucket.size.foreach(s => agg = agg.size(s)) - having match { - case Some(criteria) => - criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { - case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => - agg = agg.includeRegex(regex) - case BucketIncludesExcludes(values, _) if values.nonEmpty => - agg = agg.includeExactValues(values.toArray) - case _ => - } - criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { - case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => - agg = agg.excludeRegex(regex) - case BucketIncludesExcludes(values, _) if values.nonEmpty => - agg = agg.excludeExactValues(values.toArray) + agg match { + case termsAgg: TermsAggregation => + bucket.size.foreach(s => agg = termsAgg.size(s)) + having match { + case Some(criteria) => + criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = termsAgg.includeRegex(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = termsAgg.includeExactValues(values.toArray) + case _ => + } + criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = termsAgg.excludeRegex(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = termsAgg.excludeExactValues(values.toArray) + case _ => + } case _ => } case _ => } current match { - case Some(subAgg) => Some(agg.copy(subaggs = Seq(subAgg))) + case Some(subAgg) => + agg match { + case termsAgg: TermsAggregation => + agg = termsAgg.subaggs(Seq(subAgg)) + case dateHistogramAgg: DateHistogramAggregation => + agg = dateHistogramAgg.subaggs(Seq(subAgg)) + case _ => + } + Some(agg) case None => - val aggregationsWithOrder: Seq[TermsOrder] = aggregationsDirection.toSeq.map { kv => - kv._2 match { - case Asc => TermsOrder(kv._1, asc = true) - case _ => TermsOrder(kv._1, asc = false) + val subaggs = + having match { + case Some(criteria) => + val script = metricSelectorForBucket( + criteria, + nested, + allElasticAggregations + ) + + if (script.nonEmpty) { + val bucketSelector = + bucketSelectorAggregation( + "having_filter", + Script(script), + extractMetricsPathForBucket( + criteria, + nested, + allElasticAggregations + ) + ) + aggregations :+ bucketSelector + } else { + aggregations + } + case None => + aggregations } - } - val withAggregationOrders = - if (aggregationsWithOrder.nonEmpty) - agg.order(aggregationsWithOrder) - else - agg - val withHaving = having match { - case Some(criteria) => - val script = metricSelectorForBucket( - criteria, - nested, - allElasticAggregations - ) - if (script.nonEmpty) { - val bucketSelector = - bucketSelectorAggregation( - "having_filter", - Script(script), - extractMetricsPathForBucket( - criteria, - nested, - allElasticAggregations - ) - ) - withAggregationOrders.copy(subaggs = aggregations :+ bucketSelector) - } else { - withAggregationOrders.copy(subaggs = aggregations) + agg match { + case termsAgg: TermsAggregation => + val aggregationsWithOrder: Seq[TermsOrder] = aggregationsDirection.toSeq.map { kv => + kv._2 match { + case Asc => TermsOrder(kv._1, asc = true) + case _ => TermsOrder(kv._1, asc = false) + } } - case None => withAggregationOrders.copy(subaggs = aggregations) + if (aggregationsWithOrder.nonEmpty) + agg = termsAgg.order(aggregationsWithOrder).copy(subaggs = subaggs) + else + agg = termsAgg.copy(subaggs = subaggs) + case dateHistogramAggregation: DateHistogramAggregation => + agg = dateHistogramAggregation.copy(subaggs = subaggs) } - Some(withHaving) + Some(agg) } } } diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala index b29f1ce2..68e6c2b7 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala @@ -16,7 +16,13 @@ package app.softnetwork.elastic.sql -import app.softnetwork.elastic.sql.`type`.{SQLBigInt, SQLDouble, SQLTemporal, SQLVarchar} +import app.softnetwork.elastic.sql.`type`.{ + SQLBigInt, + SQLDouble, + SQLNumeric, + SQLTemporal, + SQLVarchar +} import app.softnetwork.elastic.sql.function.aggregate.COUNT import app.softnetwork.elastic.sql.function.geo.{Distance, Meters} import app.softnetwork.elastic.sql.operator._ @@ -34,7 +40,7 @@ import com.sksamuel.elastic4s.requests.searches.aggs.{ } import com.sksamuel.elastic4s.requests.searches.queries.compound.BoolQuery import com.sksamuel.elastic4s.requests.searches.queries.{InnerHit, Query} -import com.sksamuel.elastic4s.requests.searches.sort.FieldSort +import com.sksamuel.elastic4s.requests.searches.sort.{FieldSort, ScriptSort, ScriptSortType} import com.sksamuel.elastic4s.requests.searches.{ MultiSearchRequest, SearchBodyBuilderFn, @@ -457,7 +463,7 @@ package object bridge { _search } - _search = scriptFields.filterNot(_.aggregation) match { + _search = scriptFields.filterNot(_.isAggregation) match { case Nil => _search case _ => _search scriptfields scriptFields.map { field => @@ -478,17 +484,55 @@ package object bridge { _search = orderBy match { case Some(o) if aggregates.isEmpty && buckets.isEmpty => - _search sortBy o.sorts.map(sort => - sort.order match { - case Some(Desc) => FieldSort(sort.field).desc() - case _ => FieldSort(sort.field).asc() + _search sortBy o.sorts.map { sort => + if (sort.isScriptSort) { + val context = PainlessContext() + val painless = sort.field.painless(Some(context)) + val painlessScript = s"$context$painless" + val script = + sort.out match { + case _: SQLTemporal if !painless.endsWith("toEpochMilli()") => + val parts = painlessScript.split(";").toSeq + if (parts.size > 1) { + val lastPart = parts.last.trim.stripPrefix("return ") + if (lastPart.split(" ").toSeq.size == 1) { + val newLastPart = + s"""($lastPart != null) ? $lastPart.toInstant().toEpochMilli() : null""" + s"${parts.dropRight(1).mkString(";")}; return $newLastPart" + } else { + painlessScript + } + } else { + s"$painlessScript.toInstant().toEpochMilli()" + } + case _ => painlessScript + } + val scriptSort = + ScriptSort( + script = Script(script = script) + .lang("painless") + .scriptType(Source), + scriptSortType = sort.field.out match { + case _: SQLTemporal | _: SQLNumeric => ScriptSortType.Number + case _ => ScriptSortType.String + } + ) + sort.order match { + case Some(Desc) => scriptSort.desc() + case _ => scriptSort.asc() + } + } else { + sort.order match { + case Some(Desc) => FieldSort(sort.field.aliasOrName).desc() + case _ => FieldSort(sort.field.aliasOrName).asc() + } } - ) + } case _ => _search } if (allAggregations.nonEmpty && fields.isEmpty) { - _search size 0 + _search size 0 fetchSource false } else { limit match { case Some(l) => _search limit l.limit from l.offset.map(_.offset).getOrElse(0) @@ -512,7 +556,7 @@ package object bridge { implicit def expressionToQuery(expression: GenericExpression): Query = { import expression._ - if (aggregation) + if (isAggregation) return matchAllQuery() if ( identifier.functions.nonEmpty && (identifier.functions.size > 1 || (identifier.functions.head match { diff --git a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 1a7651b2..08a52cf6 100644 --- a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -526,12 +526,13 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "Country": { | "terms": { | "field": "Country", | "exclude": ["USA"], + | "min_doc_count": 1, | "order": { | "_key": "asc" | } @@ -541,6 +542,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "terms": { | "field": "City", | "exclude": ["Berlin"], + | "min_doc_count": 0, | "order": { | "cnt": "desc" | } @@ -709,7 +711,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "min_score": 1.0, - | "_source": true, + | "_source": false, | "aggs": { | "inner_products": { | "nested": { @@ -794,7 +796,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "cat": { | "terms": { | "field": "products.category", - | "size": 10 + | "size": 10, + | "min_doc_count": 1 | }, | "aggs": { | "min_price": { @@ -1005,11 +1008,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "userId": { | "terms": { - | "field": "userId" + | "field": "userId", + | "min_doc_count": 1 | }, | "aggs": { | "lastSeen": { @@ -1049,12 +1053,13 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "Country": { | "terms": { | "field": "Country", | "exclude": ["USA"], + | "min_doc_count":1, | "order": { | "_key": "asc" | } @@ -1063,7 +1068,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "City": { | "terms": { | "field": "City", - | "exclude": ["Berlin"] + | "exclude": ["Berlin"], + | "min_doc_count":0 | }, | "aggs": { | "cnt": { @@ -1114,14 +1120,13 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "Country": { | "terms": { | "field": "Country", - | "exclude": [ - | "USA" - | ], + | "exclude": ["USA"], + | "min_doc_count":1, | "order": { | "_key": "asc" | } @@ -1130,9 +1135,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "City": { | "terms": { | "field": "City", - | "exclude": [ - | "Berlin" - | ] + | "exclude": ["Berlin"], + | "min_doc_count":0 | }, | "aggs": { | "cnt": { @@ -1189,11 +1193,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "identifier": { | "terms": { | "field": "identifier", + | "min_doc_count":1, | "order": { | "ct": "desc" | } @@ -1356,11 +1361,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "identifier": { | "terms": { | "field": "identifier", + | "min_doc_count":1, | "order": { | "ct": "desc" | } @@ -1513,11 +1519,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "identifier": { | "terms": { - | "field": "identifier" + | "field": "identifier", + | "min_doc_count":1 | }, | "aggs": { | "max_diff": { @@ -2088,7 +2095,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll("} catch", " } catch") } - it should "handle case function as script field" in { + it should "handle case function as script field" in { // 40 val select: ElasticSearchRequest = SQLQuery(caseWhen) val query = select.query @@ -2579,7 +2586,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll("\\(double\\)(\\d)", "(double) $1") } - it should "handle string function as script field and condition" in { + it should "handle string function as script field and condition" in { // 45 val select: ElasticSearchRequest = SQLQuery(string) val query = select.query @@ -2754,11 +2761,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | } | }, - | "_source": true, + | "_source": false, | "aggs": { | "dept": { | "terms": { - | "field": "department" + | "field": "department", + | "min_doc_count":1 | }, | "aggs": { | "cnt": { @@ -3185,7 +3193,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll("lat,arg", "lat, arg") } - it should "handle between with temporal" in { + it should "handle between with temporal" in { // 50 val select: ElasticSearchRequest = SQLQuery(betweenTemporal) val query = select.query @@ -3603,7 +3611,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "avg_popularity": { | "avg": { @@ -3637,7 +3645,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "comments": { | "nested": { @@ -3698,7 +3706,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "comments": { | "nested": { diff --git a/core/src/main/scala/app/softnetwork/elastic/client/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/package.scala index c14ea716..4eb2d5ef 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/package.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/package.scala @@ -34,10 +34,6 @@ package object client extends SerializationApi { */ type JSONQuery = String - /** Type alias for JSON results - */ - type JSONResults = String - /** Elastic response case class * @param sql * - the SQL query if any diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 1313f27a..10dc6b3f 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -17,6 +17,7 @@ package app.softnetwork.elastic.sql.bridge import app.softnetwork.elastic.sql.PainlessContext +import app.softnetwork.elastic.sql.`type`.SQLTemporal import app.softnetwork.elastic.sql.query.{ Asc, Bucket, @@ -31,6 +32,8 @@ import app.softnetwork.elastic.sql.query.{ } import app.softnetwork.elastic.sql.function._ import app.softnetwork.elastic.sql.function.aggregate._ +import app.softnetwork.elastic.sql.function.time.DateTrunc +import app.softnetwork.elastic.sql.time.TimeUnit import com.sksamuel.elastic4s.ElasticApi.{ avgAgg, bucketSelectorAggregation, @@ -44,11 +47,14 @@ import com.sksamuel.elastic4s.ElasticApi.{ valueCountAgg } import com.sksamuel.elastic4s.script.Script +import com.sksamuel.elastic4s.searches.DateHistogramInterval import com.sksamuel.elastic4s.searches.aggs.{ Aggregation, CardinalityAggregation, + DateHistogramAggregation, ExtendedStatsAggregation, FilterAggregation, + HistogramOrder, NestedAggregation, StatsAggregation, TermsAggregation, @@ -93,7 +99,10 @@ object ElasticAggregation { import sqlAgg._ val sourceField = identifier.path - val direction = bucketsDirection.get(identifier.identifierName) + val direction = + bucketsDirection + .get(identifier.identifierName) + .orElse(bucketsDirection.get(identifier.aliasOrName)) val field = fieldAlias match { case Some(alias) => alias.alias @@ -190,13 +199,13 @@ object ElasticAggregation { sort.order match { case Some(Desc) => th.window match { - case LAST_VALUE => FieldSort(sort.field).asc() - case _ => FieldSort(sort.field).desc() + case LAST_VALUE => FieldSort(sort.field.aliasOrName).asc() + case _ => FieldSort(sort.field.aliasOrName).desc() } case _ => th.window match { - case LAST_VALUE => FieldSort(sort.field).desc() - case _ => FieldSort(sort.field).asc() + case LAST_VALUE => FieldSort(sort.field.aliasOrName).desc() + case _ => FieldSort(sort.field.aliasOrName).asc() } } ) @@ -269,82 +278,152 @@ object ElasticAggregation { having: Option[Criteria], nested: Option[NestedElement], allElasticAggregations: Seq[ElasticAggregation] - ): Option[TermsAggregation] = { - buckets.reverse.foldLeft(Option.empty[TermsAggregation]) { (current, bucket) => + ): Option[Aggregation] = { + var first = false + val nbBuckets = buckets.size + buckets.reverse.foldLeft(Option.empty[Aggregation]) { (current, bucket) => // Determine the bucketPath of the current bucket val currentBucketPath = bucket.identifier.path - var agg = { - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - termsAgg(bucket.name, currentBucketPath) - .order(Seq(direction match { - case Asc => TermsOrder("_key", asc = true) - case _ => TermsOrder("_key", asc = false) - })) - case None => - termsAgg(bucket.name, currentBucketPath) + val minDocCount = + if ((first || current.isEmpty) && nbBuckets > 1) { + 0 + } else { + first = true + 1 + } + + var agg: Aggregation = { + bucket.out match { + case _: SQLTemporal => + val functions = bucket.identifier.functions + val interval: Option[DateHistogramInterval] = + if (functions.size == 1) { + functions.head match { + case trunc: DateTrunc => + trunc.unit match { + case TimeUnit.YEARS => Option(DateHistogramInterval.Year) + case TimeUnit.QUARTERS => Option(DateHistogramInterval.Quarter) + case TimeUnit.MONTHS => Option(DateHistogramInterval.Month) + case TimeUnit.WEEKS => Option(DateHistogramInterval.Week) + case TimeUnit.DAYS => Option(DateHistogramInterval.Day) + case TimeUnit.HOURS => Option(DateHistogramInterval.Hour) + case TimeUnit.MINUTES => Option(DateHistogramInterval.Minute) + case TimeUnit.SECONDS => Option(DateHistogramInterval.Second) + case _ => None + } + case _ => None + } + } else { + None + } + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, interval = interval) + .field(currentBucketPath) + .minDocCount(minDocCount) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, interval = interval) + .field(currentBucketPath) + .minDocCount(minDocCount) + } + case _ => + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + termsAgg(bucket.name, currentBucketPath) + .minDocCount(minDocCount) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + termsAgg(bucket.name, currentBucketPath) + .minDocCount(minDocCount) + } } } - bucket.size.foreach(s => agg = agg.size(s)) - having match { - case Some(criteria) => - criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { - case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => - agg = agg.include(regex) - case BucketIncludesExcludes(values, _) if values.nonEmpty => - agg = agg.include(values.toArray) - case _ => - } - criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { - case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => - agg = agg.exclude(regex) - case BucketIncludesExcludes(values, _) if values.nonEmpty => - agg = agg.exclude(values.toArray) + agg match { + case termsAgg: TermsAggregation => + bucket.size.foreach(s => agg = termsAgg.size(s)) + having match { + case Some(criteria) => + criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = termsAgg.include(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = termsAgg.include(values.toArray) + case _ => + } + criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = termsAgg.exclude(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = termsAgg.exclude(values.toArray) + case _ => + } case _ => } case _ => } current match { - case Some(subAgg) => Some(agg.copy(subaggs = Seq(subAgg))) + case Some(subAgg) => + agg match { + case termsAgg: TermsAggregation => + agg = termsAgg.subaggs(Seq(subAgg)) + case dateHistogramAgg: DateHistogramAggregation => + agg = dateHistogramAgg.subaggs(Seq(subAgg)) + case _ => + } + Some(agg) case None => - val aggregationsWithOrder: Seq[TermsOrder] = aggregationsDirection.toSeq.map { kv => - kv._2 match { - case Asc => TermsOrder(kv._1, asc = true) - case _ => TermsOrder(kv._1, asc = false) + val subaggs = + having match { + case Some(criteria) => + val script = metricSelectorForBucket( + criteria, + nested, + allElasticAggregations + ) + + if (script.nonEmpty) { + val bucketSelector = + bucketSelectorAggregation( + "having_filter", + Script(script), + extractMetricsPathForBucket( + criteria, + nested, + allElasticAggregations + ) + ) + aggregations :+ bucketSelector + } else { + aggregations + } + case None => + aggregations } - } - val withAggregationOrders = - if (aggregationsWithOrder.nonEmpty) - agg.order(aggregationsWithOrder) - else - agg - val withHaving = having match { - case Some(criteria) => - val script = metricSelectorForBucket( - criteria, - nested, - allElasticAggregations - ) - if (script.nonEmpty) { - val bucketSelector = - bucketSelectorAggregation( - "having_filter", - Script(script), - extractMetricsPathForBucket( - criteria, - nested, - allElasticAggregations - ) - ) - withAggregationOrders.copy(subaggs = aggregations :+ bucketSelector) - } else { - withAggregationOrders.copy(subaggs = aggregations) + agg match { + case termsAgg: TermsAggregation => + val aggregationsWithOrder: Seq[TermsOrder] = aggregationsDirection.toSeq.map { kv => + kv._2 match { + case Asc => TermsOrder(kv._1, asc = true) + case _ => TermsOrder(kv._1, asc = false) + } } - case None => withAggregationOrders.copy(subaggs = aggregations) + if (aggregationsWithOrder.nonEmpty) + agg = termsAgg.order(aggregationsWithOrder).copy(subaggs = subaggs) + else + agg = termsAgg.copy(subaggs = subaggs) + case dateHistogramAggregation: DateHistogramAggregation => + agg = dateHistogramAggregation.copy(subaggs = subaggs) } - Some(withHaving) + Some(agg) } } } diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala index ba3d7dae..22443af2 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala @@ -16,7 +16,13 @@ package app.softnetwork.elastic.sql -import app.softnetwork.elastic.sql.`type`.{SQLBigInt, SQLDouble, SQLTemporal, SQLVarchar} +import app.softnetwork.elastic.sql.`type`.{ + SQLBigInt, + SQLDouble, + SQLNumeric, + SQLTemporal, + SQLVarchar +} import app.softnetwork.elastic.sql.function.aggregate.COUNT import app.softnetwork.elastic.sql.function.geo.{Distance, Meters} import app.softnetwork.elastic.sql.operator._ @@ -35,7 +41,7 @@ import com.sksamuel.elastic4s.searches.aggs.{ } import com.sksamuel.elastic4s.searches.queries.{BoolQuery, InnerHit, Query} import com.sksamuel.elastic4s.searches.{MultiSearchRequest, SearchRequest} -import com.sksamuel.elastic4s.searches.sort.FieldSort +import com.sksamuel.elastic4s.searches.sort.{FieldSort, ScriptSort, ScriptSortType} import scala.language.implicitConversions @@ -403,7 +409,8 @@ package object bridge { request.buckets, request.aggregates.map( ElasticAggregation(_, request.having.flatMap(_.criteria), request.sorts) - ) + ), + request.orderBy.map(_.sorts).getOrElse(Seq.empty) ).minScore(request.score) implicit def requestToSearchRequest(request: SQLSearchRequest): SearchRequest = { @@ -453,7 +460,7 @@ package object bridge { _search } - _search = scriptFields.filterNot(_.aggregation) match { + _search = scriptFields.filterNot(_.isAggregation) match { case Nil => _search case _ => _search scriptfields scriptFields.map { field => @@ -474,17 +481,55 @@ package object bridge { _search = orderBy match { case Some(o) if aggregates.isEmpty && buckets.isEmpty => - _search sortBy o.sorts.map(sort => - sort.order match { - case Some(Desc) => FieldSort(sort.field).desc() - case _ => FieldSort(sort.field).asc() + _search sortBy o.sorts.map { sort => + if (sort.isScriptSort) { + val context = PainlessContext() + val painless = sort.field.painless(Some(context)) + val painlessScript = s"$context$painless" + val script = + sort.out match { + case _: SQLTemporal if !painless.endsWith("toEpochMilli()") => + val parts = painlessScript.split(";").toSeq + if (parts.size > 1) { + val lastPart = parts.last.trim.stripPrefix("return ") + if (lastPart.split(" ").toSeq.size == 1) { + val newLastPart = + s"""($lastPart != null) ? $lastPart.toInstant().toEpochMilli() : null""" + s"${parts.dropRight(1).mkString(";")}; return $newLastPart" + } else { + painlessScript + } + } else { + s"$painlessScript.toInstant().toEpochMilli()" + } + case _ => painlessScript + } + val scriptSort = + ScriptSort( + script = Script(script = script) + .lang("painless") + .scriptType(Source), + scriptSortType = sort.field.out match { + case _: SQLTemporal | _: SQLNumeric => ScriptSortType.Number + case _ => ScriptSortType.String + } + ) + sort.order match { + case Some(Desc) => scriptSort.desc() + case _ => scriptSort.asc() + } + } else { + sort.order match { + case Some(Desc) => FieldSort(sort.field.aliasOrName).desc() + case _ => FieldSort(sort.field.aliasOrName).asc() + } } - ) + } case _ => _search } if (allAggregations.nonEmpty || buckets.nonEmpty) { - _search size 0 + _search size 0 fetchSource false } else { limit match { case Some(l) => _search limit l.limit from l.offset.map(_.offset).getOrElse(0) @@ -508,7 +553,7 @@ package object bridge { implicit def expressionToQuery(expression: GenericExpression): Query = { import expression._ - if (aggregation) + if (isAggregation) return matchAllQuery() if ( identifier.functions.nonEmpty && (identifier.functions.size > 1 || (identifier.functions.head match { diff --git a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 5ada90b9..74a8694c 100644 --- a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -526,12 +526,13 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "Country": { | "terms": { | "field": "Country", | "exclude": "USA", + | "min_doc_count": 1, | "order": { | "_key": "asc" | } @@ -541,6 +542,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "terms": { | "field": "City", | "exclude": "Berlin", + | "min_doc_count": 0, | "order": { | "cnt": "desc" | } @@ -709,7 +711,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "min_score": 1.0, - | "_source": true, + | "_source": false, | "aggs": { | "inner_products": { | "nested": { @@ -794,7 +796,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "cat": { | "terms": { | "field": "products.category", - | "size": 10 + | "size": 10, + | "min_doc_count": 1 | }, | "aggs": { | "min_price": { @@ -1005,11 +1008,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "userId": { | "terms": { - | "field": "userId" + | "field": "userId", + | "min_doc_count": 1 | }, | "aggs": { | "lastSeen": { @@ -1049,12 +1053,13 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "Country": { | "terms": { | "field": "Country", | "exclude": "USA", + | "min_doc_count":1, | "order": { | "_key": "asc" | } @@ -1063,7 +1068,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "City": { | "terms": { | "field": "City", - | "exclude": "Berlin" + | "exclude": "Berlin", + | "min_doc_count":0 | }, | "aggs": { | "cnt": { @@ -1114,12 +1120,13 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "Country": { | "terms": { | "field": "Country", | "exclude": "USA", + | "min_doc_count":1, | "order": { | "_key": "asc" | } @@ -1128,7 +1135,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "City": { | "terms": { | "field": "City", - | "exclude": "Berlin" + | "exclude": "Berlin", + | "min_doc_count":0 | }, | "aggs": { | "cnt": { @@ -1185,11 +1193,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "identifier": { | "terms": { | "field": "identifier", + | "min_doc_count":1, | "order": { | "ct": "desc" | } @@ -1352,11 +1361,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "identifier": { | "terms": { | "field": "identifier", + | "min_doc_count":1, | "order": { | "ct": "desc" | } @@ -1509,11 +1519,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "identifier": { | "terms": { - | "field": "identifier" + | "field": "identifier", + | "min_doc_count":1 | }, | "aggs": { | "max_diff": { @@ -2750,11 +2761,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | } | }, - | "_source": true, + | "_source": false, | "aggs": { | "dept": { | "terms": { - | "field": "department" + | "field": "department", + | "min_doc_count":1 | }, | "aggs": { | "cnt": { @@ -3599,7 +3611,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "avg_popularity": { | "avg": { @@ -3633,7 +3645,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "comments": { | "nested": { @@ -3694,7 +3706,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "size": 0, - | "_source": true, + | "_source": false, | "aggs": { | "comments": { | "nested": { @@ -3732,15 +3744,16 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { it should "test" in { val query = """SELECT - | name, - | department, - | salary, - | FIRST_VALUE(salary) OVER ( - | PARTITION BY department - | ORDER BY hire_date - | ) as firstSalaryInDept - | FROM employees - |""".stripMargin + | category, + | SUM(amount) AS totalSales, + | COUNT(*) AS orderCount, + | DATE_TRUNC(sales_date, MONTH) as salesMonth + | FROM orders + | GROUP BY DATE_TRUNC(sales_date, MONTH), category + | ORDER BY DATE_TRUNC(sales_date, MONTH) DESC, category ASC""".stripMargin.replaceAll( + "\n", + " " + ) val select: ElasticSearchRequest = SQLQuery(query) println(select.query) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala index ad61c9d1..1b1f0abf 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala @@ -23,6 +23,8 @@ package object aggregate { sealed trait AggregateFunction extends Function { def multivalued: Boolean = false + + override def isAggregation: Boolean = true } case object COUNT extends Expr("COUNT") with AggregateFunction @@ -88,7 +90,7 @@ package object aggregate { updated.withFields( fields = request.select.fields .filterNot(field => - field.aggregation || request.bucketNames.keys.toSeq + field.isAggregation || request.bucketNames.keys.toSeq .contains(field.identifier.identifierName) ) .filterNot(f => request.excludes.contains(f.sourceField)) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala index 0daf4517..0d0722c7 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala @@ -93,7 +93,9 @@ package object function { lazy val aggregateFunction: Option[AggregateFunction] = aggregations.headOption - lazy val aggregation: Boolean = aggregateFunction.isDefined + override def isAggregation: Boolean = aggregateFunction.isDefined + + override def hasAggregation: Boolean = functions.exists(_.hasAggregation) override def in: SQLType = functions.lastOption.map(_.in).getOrElse(super.in) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala index 24e199c9..0eae1f5e 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala @@ -129,4 +129,5 @@ case class ArithmeticExpression( expr } + override def hasAggregation: Boolean = left.hasAggregation || right.hasAggregation } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala index bac4accb..2fea4f12 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala @@ -67,6 +67,8 @@ package object sql { def nullable: Boolean = !system def dateMathScript: Boolean = false def isTemporal: Boolean = out.isInstanceOf[SQLTemporal] + def isAggregation: Boolean = false + def hasAggregation: Boolean = isAggregation } trait TokenValue extends Token { @@ -623,7 +625,7 @@ package object sql { def hasBucket: Boolean = bucket.isDefined def allMetricsPath: Map[String, String] = { - if (aggregation) { + if (isAggregation) { val metricName = aliasOrName Map(metricName -> metricName) } else { @@ -675,7 +677,7 @@ package object sql { } def paramName: String = - if (aggregation && functions.size == 1) s"params.$aliasOrName" + if (isAggregation && functions.size == 1) s"params.$aliasOrName" else if (path.nonEmpty) s"doc['$path'].value" else "" @@ -762,7 +764,7 @@ package object sql { override def param: String = paramName private[this] var _nullable = - this.name.nonEmpty && (!aggregation || functions.size > 1) + this.name.nonEmpty && (!isAggregation || functions.size > 1) protected def nullable_=(b: Boolean): Unit = { _nullable = b diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/GroupByParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/GroupByParser.scala index 4b7670b4..3feb6669 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/GroupByParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/GroupByParser.scala @@ -16,12 +16,21 @@ package app.softnetwork.elastic.sql.parser +import app.softnetwork.elastic.sql.Identifier import app.softnetwork.elastic.sql.query.{Bucket, GroupBy} trait GroupByParser { self: Parser with WhereParser => - def bucket: PackratParser[Bucket] = (long | identifier) ^^ { i => + def bucketWithFunction: PackratParser[Identifier] = + identifierWithArithmeticExpression | + identifierWithTransformation | + identifierWithAggregation | + identifierWithIntervalFunction | + identifierWithFunction | + identifier + + def bucket: PackratParser[Bucket] = (long | bucketWithFunction) ^^ { i => Bucket(i) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/OrderByParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/OrderByParser.scala index 884f0616..74ea9c9d 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/OrderByParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/OrderByParser.scala @@ -16,6 +16,7 @@ package app.softnetwork.elastic.sql.parser +import app.softnetwork.elastic.sql.Identifier import app.softnetwork.elastic.sql.function.Function import app.softnetwork.elastic.sql.query.{Asc, Desc, FieldSort, OrderBy} @@ -29,17 +30,17 @@ trait OrderByParser { private def fieldName: PackratParser[String] = """\b(?!(?i)limit\b)[a-zA-Z_][a-zA-Z0-9_]*""".r ^^ (f => f) - def fieldWithFunction: PackratParser[(String, List[Function])] = - rep1sep(sql_function, start) ~ start.? ~ fieldName ~ rep1(end) ^^ { case f ~ _ ~ n ~ _ => - (n, f) - } + def fieldWithFunction: PackratParser[Identifier] = + identifierWithArithmeticExpression | + identifierWithTransformation | + identifierWithAggregation | + identifierWithIntervalFunction | + identifierWithFunction | + identifier def sort: PackratParser[FieldSort] = - (fieldWithFunction | fieldName) ~ (asc | desc).? ^^ { case f ~ o => - f match { - case i: (String, List[Function]) => FieldSort(i._1, o, i._2) - case s: String => FieldSort(s, o, List.empty) - } + fieldWithFunction ~ (asc | desc).? ^^ { case f ~ o => + FieldSort(f, o) } def orderBy: PackratParser[OrderBy] = OrderBy.regex ~ rep1sep(sort, separator) ^^ { case _ ~ s => diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala index e815d1ad..f32e2cd9 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala @@ -37,10 +37,16 @@ package object aggregate { def aggregate_function: PackratParser[AggregateFunction] = count | min | max | avg | sum + def aggWithFunction: PackratParser[Identifier] = + identifierWithArithmeticExpression | + identifierWithTransformation | + identifierWithIntervalFunction | + identifierWithFunction | + identifier + def identifierWithAggregation: PackratParser[Identifier] = - aggregate_function ~ start ~ (identifierWithFunction | identifierWithIntervalFunction | identifier) ~ end ^^ { - case a ~ _ ~ i ~ _ => - i.withFunctions(a +: i.functions) + aggregate_function ~ start ~ aggWithFunction ~ end ^^ { case a ~ _ ~ i ~ _ => + i.withFunctions(a +: i.functions) } def partition_by: PackratParser[Seq[Identifier]] = @@ -55,7 +61,7 @@ package object aggregate { start ~ identifier ~ end ~ over.? ^^ { case _ ~ id ~ _ ~ o => o match { case Some((pb, ob)) => (id, pb, ob) - case None => (id, Seq.empty, OrderBy(Seq(FieldSort(id.name, order = None)))) + case None => (id, Seq.empty, OrderBy(Seq(FieldSort(id, order = None)))) } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala index 9544f8a3..1b6e9b2c 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala @@ -16,7 +16,7 @@ package app.softnetwork.elastic.sql.query -import app.softnetwork.elastic.sql.`type`.SQLTypes +import app.softnetwork.elastic.sql.`type`.{SQLType, SQLTypes} import app.softnetwork.elastic.sql.operator._ import app.softnetwork.elastic.sql.{Expr, Identifier, LongValue, TokenRegex, Updateable} @@ -86,6 +86,8 @@ case class Bucket( case None => "" // Root level } } + + override def out: SQLType = identifier.out } object MetricSelectorScript { @@ -118,7 +120,7 @@ object MetricSelectorScript { case _: MultiMatchCriteria => "1 == 1" - case e: Expression if e.aggregation => + case e: Expression if e.isAggregation => // NO FILTERING: the script is generated for all metrics val painless = e.painless(None) e.maybeValue match { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala index 94e111ac..952a08ad 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala @@ -28,17 +28,20 @@ case object Desc extends Expr("DESC") with SortOrder case object Asc extends Expr("ASC") with SortOrder case class FieldSort( - field: String, - order: Option[SortOrder], - functions: List[Function] = List.empty + field: Identifier, + order: Option[SortOrder] ) extends FunctionChain with Updateable { + lazy val functions: List[Function] = field.functions lazy val direction: SortOrder = order.getOrElse(Asc) - lazy val name: String = toSQL(field) + lazy val name: String = field.identifierName override def sql: String = s"$name $direction" override def update(request: SQLSearchRequest): FieldSort = this.copy( - field = Identifier(field).update(request).name + field = field.update(request) ) + def isScriptSort: Boolean = functions.nonEmpty && !hasAggregation && field.fieldAlias.isEmpty + + def isBucketScript: Boolean = functions.nonEmpty && !isAggregation && hasAggregation } case class OrderBy(sorts: Seq[FieldSort]) extends Updateable { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala index 10fc24b4..31d4ac01 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala @@ -56,7 +56,7 @@ case class SQLSearchRequest( lazy val nestedFields: Map[String, Seq[Field]] = select.fields - .filterNot(_.aggregation) + .filterNot(_.isAggregation) .filter(_.nested) .groupBy(_.identifier.innerHitsName.getOrElse("")) lazy val nested: Seq[NestedElement] = @@ -117,7 +117,7 @@ case class SQLSearchRequest( lazy val scriptFields: Seq[Field] = select.fields.filter(_.isScriptField) lazy val fields: Seq[String] = { - if (aggregates.isEmpty && buckets.isEmpty) + if (aggregates.isEmpty && buckets.isEmpty && bucketScripts.isEmpty) select.fields .filterNot(_.isScriptField) .filterNot(_.nested) @@ -128,12 +128,12 @@ case class SQLSearchRequest( Seq.empty } - lazy val windowFields: Seq[Field] = select.fields.filter(_.windows.nonEmpty) + lazy val windowFields: Seq[Field] = select.fields.filter(_.isWindow) lazy val windowFunctions: Seq[WindowFunction] = windowFields.flatMap(_.windows) lazy val aggregates: Seq[Field] = - select.fields.filter(_.aggregation).filterNot(_.windows.isDefined) ++ windowFields + select.fields.filter(_.isAggregation).filterNot(_.windows.isDefined) ++ windowFields lazy val sqlAggregations: Map[String, SQLAggregation] = aggregates.flatMap(f => SQLAggregation.fromField(f, this)).map(a => a.aggName -> a).toMap @@ -175,7 +175,8 @@ case class SQLSearchRequest( _ <- { // validate that non-aggregated fields are not present when group by is present if (groupBy.isDefined) { - val nonAggregatedFields = select.fields.filterNot(f => f.aggregation || f.isScriptField) + val nonAggregatedFields = + select.fields.filterNot(f => f.hasAggregation) val invalidFields = nonAggregatedFields.filterNot(f => buckets.exists(b => b.name == f.fieldAlias.map(_.alias).getOrElse(f.sourceField.replace(".", "_")) @@ -194,4 +195,6 @@ case class SQLSearchRequest( } } yield () } + + lazy val bucketScripts: Seq[Field] = select.fields.filter(_.isBucketScript) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala index 806ce4d7..f30944fa 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala @@ -40,7 +40,8 @@ case class Field( with FunctionChain with PainlessScript with DateMathScript { - def isScriptField: Boolean = functions.nonEmpty && !aggregation && identifier.bucket.isEmpty + def isScriptField: Boolean = + functions.nonEmpty && !hasAggregation && identifier.bucket.isEmpty override def sql: String = s"$identifier${asString(fieldAlias)}" lazy val sourceField: String = { if (identifier.nested) { @@ -67,6 +68,8 @@ case class Field( lazy val windows: Option[WindowFunction] = functions.collectFirst { case th: WindowFunction => th } + def isWindow: Boolean = windows.isDefined + def update(request: SQLSearchRequest): Field = { windows match { case Some(th) => @@ -93,6 +96,8 @@ case class Field( lazy val nested: Boolean = identifier.nested lazy val path: String = identifier.path + + def isBucketScript: Boolean = functions.nonEmpty && !isAggregation && hasAggregation } case object Except extends Expr("except") with TokenRegex diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala index 939ec421..41af2cfb 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala @@ -353,9 +353,9 @@ sealed trait Expression extends FunctionChain with ElasticFilter with Criteria { } } - override lazy val aggregation: Boolean = maybeValue match { - case Some(v: FunctionChain) => identifier.aggregation || v.aggregation - case _ => identifier.aggregation + override lazy val isAggregation: Boolean = maybeValue match { + case Some(v: FunctionChain) => identifier.isAggregation || v.isAggregation + case _ => identifier.isAggregation } def hasBucket: Boolean = identifier.hasBucket || maybeValue.exists { @@ -421,7 +421,7 @@ sealed trait Expression extends FunctionChain with ElasticFilter with Criteria { maybeValue.map(v => v.out).getOrElse(SQLTypes.Any) match { case SQLTypes.Varchar => return s"$param.compareTo(${painlessValue(context)}) < 0" - case _: SQLTemporal if !aggregation && !hasBucket => + case _: SQLTemporal if !isAggregation && !hasBucket => return s"$param.isBefore(${painlessValue(context)})" case _ => } @@ -429,7 +429,7 @@ sealed trait Expression extends FunctionChain with ElasticFilter with Criteria { maybeValue.map(v => v.out).getOrElse(SQLTypes.Any) match { case SQLTypes.Varchar => return s"$param.compareTo(${painlessValue(context)}) > 0" - case _: SQLTemporal if !aggregation && !hasBucket => + case _: SQLTemporal if !isAggregation && !hasBucket => return s"$param.isAfter(${painlessValue(context)})" case _ => } @@ -437,7 +437,7 @@ sealed trait Expression extends FunctionChain with ElasticFilter with Criteria { maybeValue.map(v => v.out).getOrElse(SQLTypes.Any) match { case SQLTypes.Varchar => return s"$param.compareTo(${painlessValue(context)}) == 0" - case _: SQLTemporal if !aggregation && !hasBucket => + case _: SQLTemporal if !isAggregation && !hasBucket => return s"$param.isEqual(${painlessValue(context)})" case _ => } @@ -445,7 +445,7 @@ sealed trait Expression extends FunctionChain with ElasticFilter with Criteria { maybeValue.map(v => v.out).getOrElse(SQLTypes.Any) match { case SQLTypes.Varchar => return s"$param.compareTo(${painlessValue(context)}) != 0" - case _: SQLTemporal if !aggregation && !hasBucket => + case _: SQLTemporal if !isAggregation && !hasBucket => return s"$param.isEqual(${painlessValue(context)}) == false" case _ => } @@ -453,7 +453,7 @@ sealed trait Expression extends FunctionChain with ElasticFilter with Criteria { maybeValue.map(v => v.out).getOrElse(SQLTypes.Any) match { case SQLTypes.Varchar => return s"$param.compareTo(${painlessValue(context)}) >= 0" - case _: SQLTemporal if !aggregation && !hasBucket => + case _: SQLTemporal if !isAggregation && !hasBucket => return s"$param.isBefore(${painlessValue(context)}) == false" case _ => } @@ -461,7 +461,7 @@ sealed trait Expression extends FunctionChain with ElasticFilter with Criteria { maybeValue.map(v => v.out).getOrElse(SQLTypes.Any) match { case SQLTypes.Varchar => return s"$param.compareTo(${painlessValue(context)}) <= 0" - case _: SQLTemporal if !aggregation && !hasBucket => + case _: SQLTemporal if !isAggregation && !hasBucket => return s"$param.isAfter(${painlessValue(context)}) == false" case _ => } From 4c4f1244304c7285deb8a997a77c5f0522bf5806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 24 Nov 2025 12:20:23 +0100 Subject: [PATCH 09/40] to fix compilation bug --- .../scala/app/softnetwork/elastic/sql/bridge/package.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala index 22443af2..c0a30662 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala @@ -409,8 +409,8 @@ package object bridge { request.buckets, request.aggregates.map( ElasticAggregation(_, request.having.flatMap(_.criteria), request.sorts) - ), - request.orderBy.map(_.sorts).getOrElse(Seq.empty) + ) + // request.orderBy.map(_.sorts).getOrElse(Seq.empty) ).minScore(request.score) implicit def requestToSearchRequest(request: SQLSearchRequest): SearchRequest = { From fcfce6a60b80e120468a7e13333f48e8ee892b19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 24 Nov 2025 12:49:13 +0100 Subject: [PATCH 10/40] to fix count using _index --- .../app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala | 2 +- .../app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index c7ac4ba6..2e424b42 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -156,7 +156,7 @@ object ElasticAggregation { case COUNT => val field = sourceField match { - case "*" | "_id" | "_index" | "_type" => "_id" + case "*" | "_id" | "_index" | "_type" => "_index" case _ => sourceField } if (distinct) diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 10dc6b3f..173d5ab2 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -156,7 +156,7 @@ object ElasticAggregation { case COUNT => val field = sourceField match { - case "*" | "_id" | "_index" | "_type" => "_id" + case "*" | "_id" | "_index" | "_type" => "_index" case _ => sourceField } if (distinct) From 716b556b18c6587fb89bb2f1731502c4f8ee5667 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 24 Nov 2025 13:27:25 +0100 Subject: [PATCH 11/40] to fix unrelated code to java client api --- .../app/softnetwork/elastic/client/java/JavaClientApi.scala | 6 ------ 1 file changed, 6 deletions(-) diff --git a/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala b/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala index 7cd2634c..a6fc8e2e 100644 --- a/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala +++ b/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala @@ -48,7 +48,6 @@ import co.elastic.clients.elasticsearch.core.reindex.{Destination, Source => ESS import co.elastic.clients.elasticsearch.core.search.{PointInTimeReference, SearchRequestBody} import co.elastic.clients.elasticsearch.indices.update_aliases.{Action, AddAction, RemoveAction} import co.elastic.clients.elasticsearch.indices.{ExistsRequest => IndexExistsRequest, _} -import co.elastic.clients.elasticsearch.sql.QueryRequest import com.google.gson.JsonParser import _root_.java.io.{IOException, StringReader} @@ -751,11 +750,6 @@ trait JavaClientGetApi extends GetApi with JavaClientHelpers { trait JavaClientSearchApi extends SearchApi with JavaClientHelpers { _: JavaClientCompanion with SerializationApi => - val response = apply().sql().query(new QueryRequest.Builder().query("SELECT 1").build()) - val row = response.rows().get(0) - val data = row.get(0) - data.toJson - override implicit def sqlSearchRequestToJsonQuery(sqlSearch: SQLSearchRequest): String = implicitly[ElasticSearchRequest](sqlSearch).query From 8a1a7bb814a78c5dbf757e9a43eca1edaa65fc88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 24 Nov 2025 17:38:33 +0100 Subject: [PATCH 12/40] handle buckets with painless script, fix min_doc_count for all buckets --- .../sql/bridge/ElasticAggregation.scala | 98 +++++--- .../elastic/sql/SQLQuerySpec.scala | 213 +++++++++--------- .../sql/bridge/ElasticAggregation.scala | 98 +++++--- .../elastic/sql/SQLQuerySpec.scala | 213 +++++++++--------- .../elastic/sql/function/geo/package.scala | 2 +- .../elastic/sql/function/package.scala | 7 + .../elastic/sql/function/time/package.scala | 4 +- .../app/softnetwork/elastic/sql/package.scala | 4 +- .../elastic/sql/query/GroupBy.scala | 25 +- 9 files changed, 390 insertions(+), 274 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 2e424b42..2bb5b5b6 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -282,18 +282,18 @@ object ElasticAggregation { nested: Option[NestedElement], allElasticAggregations: Seq[ElasticAggregation] ): Option[Aggregation] = { - var first = false val nbBuckets = buckets.size buckets.reverse.foldLeft(Option.empty[Aggregation]) { (current, bucket) => // Determine the bucketPath of the current bucket val currentBucketPath = bucket.identifier.path - val minDocCount = - if ((first || current.isEmpty) && nbBuckets > 1) { - 0 + val aggScript = + if (bucket.shouldBeScripted) { + val context = PainlessContext() + val painless = bucket.painless(Some(context)) + Some(Script(s"$context$painless").lang("painless")) } else { - first = true - 1 + None } var agg: Aggregation = { @@ -320,32 +320,74 @@ object ElasticAggregation { } else { None } - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - DateHistogramAggregation(bucket.name, calendarInterval = interval) - .field(currentBucketPath) - .minDocCount(minDocCount) - .order(direction match { - case Asc => HistogramOrder("_key", asc = true) - case _ => HistogramOrder("_key", asc = false) - }) + + aggScript match { + case Some(script) => + // Scripted date histogram + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .script(script) + .minDocCount(1) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .script(script) + .minDocCount(1) + } case _ => - DateHistogramAggregation(bucket.name, calendarInterval = interval) - .field(currentBucketPath) - .minDocCount(minDocCount) + // Standard date histogram + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .field(currentBucketPath) + .minDocCount(1) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .field(currentBucketPath) + .minDocCount(1) + } } + case _ => - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - termsAgg(bucket.name, currentBucketPath) - .minDocCount(minDocCount) - .order(Seq(direction match { - case Asc => TermsOrder("_key", asc = true) - case _ => TermsOrder("_key", asc = false) - })) + aggScript match { + case Some(script) => + // Scripted terms aggregation + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + TermsAggregation(bucket.name) + .script(script) + .minDocCount(1) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + TermsAggregation(bucket.name) + .script(script) + .minDocCount(1) + } case _ => - termsAgg(bucket.name, currentBucketPath) - .minDocCount(minDocCount) + // Standard terms aggregation + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + termsAgg(bucket.name, currentBucketPath) + .minDocCount(1) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + termsAgg(bucket.name, currentBucketPath) + .minDocCount(1) + } } } } diff --git a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 08a52cf6..701bab0a 100644 --- a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -851,7 +851,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "ct": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.minus(35, ChronoUnit.MINUTES)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.minus(35, ChronoUnit.MINUTES)); param1" | } | } | }, @@ -874,6 +874,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll(";", "; ") .replaceAll("\\|\\|", " || ") .replaceAll("ChronoUnit", " ChronoUnit") + .replaceAll("==", " == ") } it should "filter with date time and interval" in { @@ -1214,7 +1215,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "field": "createdAt", | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value); (param1 == null) ? null : LocalDate.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"))" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : LocalDate.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"))" | } | } | } @@ -1265,49 +1266,49 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "y": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.withDayOfYear(1).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.withDayOfYear(1).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | }, | "q": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value); def param2 = param1 != null ? param1.withMonth((((param1.getMonthValue() - 1) / 3) * 3) + 1).withDayOfMonth(1).truncatedTo(ChronoUnit.DAYS) : null; def param3 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param3.format(param2)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value); def param2 = param1 != null ? param1.withMonth((((param1.getMonthValue() - 1) / 3) * 3) + 1).withDayOfMonth(1).truncatedTo(ChronoUnit.DAYS) : null; def param3 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param3.format(param2)" | } | }, | "m": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.withDayOfMonth(1).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.withDayOfMonth(1).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | }, | "w": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.with(DayOfWeek.SUNDAY).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.with(DayOfWeek.SUNDAY).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | }, | "d": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | }, | "h": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.HOURS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.HOURS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | }, | "m2": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.MINUTES)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.MINUTES)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | }, | "lastSeen": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.SECONDS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.SECONDS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | } | }, @@ -1382,7 +1383,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "field": "createdAt", | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value); (param1 == null) ? null : ZonedDateTime.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")).truncatedTo(ChronoUnit.MINUTES).get(ChronoField.YEAR)" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : ZonedDateTime.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")).truncatedTo(ChronoUnit.MINUTES).get(ChronoField.YEAR)" | } | } | } @@ -1434,7 +1435,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "lastSeen": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.withDayOfMonth(1).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss XXX\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.withDayOfMonth(1).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss XXX\"); (param1 == null) ? null : param2.format(param1)" | } | } | }, @@ -1480,7 +1481,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "diff": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('updatedAt') || doc['updatedAt'].empty ? null : doc['updatedAt'].value); def param2 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value); (param1 == null || param2 == null) ? null : ChronoUnit.DAYS.between(param1, param2)" + | "source": "def param1 = (doc['updatedAt'].size() == 0 ? null : doc['updatedAt'].value); def param2 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null || param2 == null) ? null : ChronoUnit.DAYS.between(param1, param2)" | } | } | }, @@ -1531,7 +1532,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "max": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('updatedAt') || doc['updatedAt'].empty ? null : doc['updatedAt'].value); def param2 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value); def param3 = (param2 == null) ? null : ZonedDateTime.parse(param2, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")); (param1 == null || param2 == null) ? null : ChronoUnit.DAYS.between(param1, param3)" + | "source": "def param1 = (doc['updatedAt'].size() == 0 ? null : doc['updatedAt'].value); def param2 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); def param3 = (param2 == null) ? null : ZonedDateTime.parse(param2, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")); (param1 == null || param2 == null) ? null : ChronoUnit.DAYS.between(param1, param3)" | } | } | } @@ -1583,7 +1584,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "lastSeen": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.plus(10, ChronoUnit.DAYS)); param1" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.plus(10, ChronoUnit.DAYS)); param1" | } | } | }, @@ -1634,7 +1635,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "lastSeen": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.minus(10, ChronoUnit.DAYS)); param1" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.minus(10, ChronoUnit.DAYS)); param1" | } | } | }, @@ -1685,7 +1686,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "lastSeen": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.plus(10, ChronoUnit.DAYS)); param1" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.plus(10, ChronoUnit.DAYS)); param1" | } | } | }, @@ -1736,7 +1737,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "lastSeen": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.minus(10, ChronoUnit.DAYS)); param1" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.minus(10, ChronoUnit.DAYS)); param1" | } | } | }, @@ -1779,7 +1780,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "flag": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); param1 == null" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); param1 == null" | } | } | }, @@ -1817,7 +1818,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "flag": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); param1 != null" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); param1 != null" | } | } | }, @@ -1917,7 +1918,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.minus(35, ChronoUnit.MINUTES)); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param1 != null ? param1 : param2" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.minus(35, ChronoUnit.MINUTES)); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param1 != null ? param1 : param2" | } | } | }, @@ -1967,7 +1968,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")).minus(2, ChronoUnit.DAYS); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param3 != null ? param3 : param4" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")).minus(2, ChronoUnit.DAYS); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param3 != null ? param3 : param4" | } | } | }, @@ -2025,7 +2026,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().minus(2, ChronoUnit.HOURS); try { param3 != null ? param3 : param4 } catch (Exception e) { return null; }" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().minus(2, ChronoUnit.HOURS); try { param3 != null ? param3 : param4 } catch (Exception e) { return null; }" | } | }, | "c2": { @@ -2109,7 +2110,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value); def param2 = ZonedDateTime.now(ZoneId.of('Z')).minus(7, ChronoUnit.DAYS); def param3 = param1 == null ? false : (param1.isAfter(param2)); def param4 = (!doc.containsKey('lastSeen') || doc['lastSeen'].empty ? null : doc['lastSeen'].value.plus(2, ChronoUnit.DAYS)); def param5 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value); param3 ? param1 : param4 != null ? param4 : param5" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value); def param2 = ZonedDateTime.now(ZoneId.of('Z')).minus(7, ChronoUnit.DAYS); def param3 = param1 == null ? false : (param1.isAfter(param2)); def param4 = (doc['lastSeen'].size() == 0 ? null : doc['lastSeen'].value.plus(2, ChronoUnit.DAYS)); def param5 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); param3 ? param1 : param4 != null ? param4 : param5" | } | } | }, @@ -2162,7 +2163,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().minus(7, ChronoUnit.DAYS); def param2 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.toLocalDate().minus(3, ChronoUnit.DAYS)); def param3 = (!doc.containsKey('lastSeen') || doc['lastSeen'].empty ? null : doc['lastSeen'].value.toLocalDate().plus(2, ChronoUnit.DAYS)); def param4 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.toLocalDate()); param1 != null && param1.isEqual(param2) ? param2 : param1 != null && param1.isEqual(param3) ? param3 : param4" + | "source": "def param1 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().minus(7, ChronoUnit.DAYS); def param2 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.toLocalDate().minus(3, ChronoUnit.DAYS)); def param3 = (doc['lastSeen'].size() == 0 ? null : doc['lastSeen'].value.toLocalDate().plus(2, ChronoUnit.DAYS)); def param4 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); param1 != null && param1.isEqual(param2) ? param2 : param1 != null && param1.isEqual(param3) ? param3 : param4" | } | } | }, @@ -2217,91 +2218,91 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "dom": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_MONTH)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_MONTH)); param1" | } | }, | "dow": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_WEEK)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_WEEK)); param1" | } | }, | "doy": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_YEAR)); param1" | } | }, | "m": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MONTH_OF_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MONTH_OF_YEAR)); param1" | } | }, | "y": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.YEAR)); param1" | } | }, | "h": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.HOUR_OF_DAY)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.HOUR_OF_DAY)); param1" | } | }, | "minutes": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MINUTE_OF_HOUR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MINUTE_OF_HOUR)); param1" | } | }, | "s": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.SECOND_OF_MINUTE)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.SECOND_OF_MINUTE)); param1" | } | }, | "nano": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.NANO_OF_SECOND)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.NANO_OF_SECOND)); param1" | } | }, | "micro": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MICRO_OF_SECOND)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MICRO_OF_SECOND)); param1" | } | }, | "milli": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MILLI_OF_SECOND)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MILLI_OF_SECOND)); param1" | } | }, | "epoch": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.EPOCH_DAY)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.EPOCH_DAY)); param1" | } | }, | "off": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.OFFSET_SECONDS)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.OFFSET_SECONDS)); param1" | } | }, | "w": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.WEEK_OF_WEEK_BASED_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.WEEK_OF_WEEK_BASED_YEAR)); param1" | } | }, | "q": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.QUARTER_OF_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.QUARTER_OF_YEAR)); param1" | } | } | }, @@ -2341,7 +2342,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().get(ChronoField.YEAR); (param1 == null) ? null : (param1 * (param2 - 10)) > 10000" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().get(ChronoField.YEAR); (param1 == null) ? null : (param1 * (param2 - 10)) > 10000" | } | } | } @@ -2352,37 +2353,37 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "add": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : (param1 + 1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 + 1)" | } | }, | "sub": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : (param1 - 1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 - 1)" | } | }, | "mul": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : (param1 * 2)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 * 2)" | } | }, | "div": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : (param1 / 2)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 / 2)" | } | }, | "mod": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : (param1 % 2)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 % 2)" | } | }, | "identifier_mul_identifier2_minus_10": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); def param2 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); def lv0 = ((param1 == null || param2 == null) ? null : (param1 * param2)); (lv0 == null) ? null : (lv0 - 10)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); def lv0 = ((param1 == null || param2 == null) ? null : (param1 * param2)); (lv0 == null) ? null : (lv0 - 10)" | } | } | }, @@ -2430,7 +2431,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.sqrt(param1) > 100.0" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.sqrt(param1) > 100.0" | } | } | } @@ -2441,109 +2442,109 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "abs_identifier_plus_1_0_mul_2": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); ((param1 == null) ? null : Math.abs(param1) + 1.0) * ((double) 2)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); ((param1 == null) ? null : Math.abs(param1) + 1.0) * ((double) 2)" | } | }, | "ceil_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.ceil(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.ceil(param1)" | } | }, | "floor_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.floor(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.floor(param1)" | } | }, | "sqrt_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.sqrt(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.sqrt(param1)" | } | }, | "exp_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.exp(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.exp(param1)" | } | }, | "log_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.log(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.log(param1)" | } | }, | "log10_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.log10(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.log10(param1)" | } | }, | "pow_identifier_3": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.pow(param1, 3)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.pow(param1, 3)" | } | }, | "round_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); def param2 = Math.pow(10, 0); (param1 == null || param2 == null) ? null : Math.round((param1 * param2) / param2)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = Math.pow(10, 0); (param1 == null || param2 == null) ? null : Math.round((param1 * param2) / param2)" | } | }, | "round_identifier_2": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); def param2 = Math.pow(10, 2); (param1 == null || param2 == null) ? null : Math.round((param1 * param2) / param2)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = Math.pow(10, 2); (param1 == null || param2 == null) ? null : Math.round((param1 * param2) / param2)" | } | }, | "sign_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : (param1 > 0 ? 1 : (param1 < 0 ? -1 : 0))" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 > 0 ? 1 : (param1 < 0 ? -1 : 0))" | } | }, | "cos_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.cos(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.cos(param1)" | } | }, | "acos_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.acos(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.acos(param1)" | } | }, | "sin_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.sin(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.sin(param1)" | } | }, | "asin_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.asin(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.asin(param1)" | } | }, | "tan_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.tan(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.tan(param1)" | } | }, | "atan_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.atan(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.atan(param1)" | } | }, | "atan2_identifier_3_0": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.atan2(param1, 3.0)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.atan2(param1, 3.0)" | } | } | }, @@ -2600,7 +2601,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.trim().length() > 10" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.trim().length() > 10" | } | } | } @@ -2611,85 +2612,85 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "len": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.length()" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.length()" | } | }, | "low": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.toLowerCase()" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.toLowerCase()" | } | }, | "upp": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.toUpperCase()" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.toUpperCase()" | } | }, | "sub": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.substring(0, Math.min(3, param1.length()))" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.substring(0, Math.min(3, param1.length()))" | } | }, | "tr": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.trim()" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.trim()" | } | }, | "ltr": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.replaceAll(\"^\\\\s+\",\"\")" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.replaceAll(\"^\\\\s+\",\"\")" | } | }, | "rtr": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.replaceAll(\"\\\\s+$\",\"\")" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.replaceAll(\"\\\\s+$\",\"\")" | } | }, | "con": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : String.valueOf(param1) + \"_test\" + String.valueOf(1)" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : String.valueOf(param1) + \"_test\" + String.valueOf(1)" | } | }, | "l": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.substring(0, Math.min(5, param1.length()))" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.substring(0, Math.min(5, param1.length()))" | } | }, | "r": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.substring(param1.length() - Math.min(3, param1.length()))" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.substring(param1.length() - Math.min(3, param1.length()))" | } | }, | "rep": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.replace(\"el\", \"le\")" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.replace(\"el\", \"le\")" | } | }, | "rev": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : new StringBuilder(param1).reverse().toString()" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : new StringBuilder(param1).reverse().toString()" | } | }, | "pos": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.indexOf(\"soft\", 0) + 1" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.indexOf(\"soft\", 0) + 1" | } | }, | "reg": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : java.util.regex.Pattern.compile(\"soft\", java.util.regex.Pattern.CASE_INSENSITIVE | java.util.regex.Pattern.MULTILINE).matcher(param1).find()" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : java.util.regex.Pattern.compile(\"soft\", java.util.regex.Pattern.CASE_INSENSITIVE | java.util.regex.Pattern.MULTILINE).matcher(param1).find()" | } | } | }, @@ -2757,7 +2758,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "hire_date": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('hire_date') || doc['hire_date'].empty ? null : doc['hire_date'].value.toLocalDate()); param1" + | "source": "def param1 = (doc['hire_date'].size() == 0 ? null : doc['hire_date'].value.toLocalDate()); param1" | } | } | }, @@ -2892,7 +2893,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "ld": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.toLocalDate()); (param1 == null) ? null : param1.withDayOfMonth(param1.lengthOfMonth())" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); (param1 == null) ? null : param1.withDayOfMonth(param1.lengthOfMonth())" | } | } | }, @@ -2948,91 +2949,91 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "y": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.YEAR)); param1" | } | }, | "m": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MONTH_OF_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MONTH_OF_YEAR)); param1" | } | }, | "wd": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value); (param1 == null) ? null : (param1.get(ChronoField.DAY_OF_WEEK) + 6) % 7" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : (param1.get(ChronoField.DAY_OF_WEEK) + 6) % 7" | } | }, | "yd": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_YEAR)); param1" | } | }, | "d": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_MONTH)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_MONTH)); param1" | } | }, | "h": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.HOUR_OF_DAY)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.HOUR_OF_DAY)); param1" | } | }, | "minutes": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MINUTE_OF_HOUR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MINUTE_OF_HOUR)); param1" | } | }, | "s": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.SECOND_OF_MINUTE)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.SECOND_OF_MINUTE)); param1" | } | }, | "nano": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.NANO_OF_SECOND)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.NANO_OF_SECOND)); param1" | } | }, | "micro": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MICRO_OF_SECOND)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MICRO_OF_SECOND)); param1" | } | }, | "milli": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MILLI_OF_SECOND)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MILLI_OF_SECOND)); param1" | } | }, | "epoch": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.EPOCH_DAY)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.EPOCH_DAY)); param1" | } | }, | "off": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.OFFSET_SECONDS)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.OFFSET_SECONDS)); param1" | } | }, | "w": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.WEEK_OF_WEEK_BASED_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.WEEK_OF_WEEK_BASED_YEAR)); param1" | } | }, | "q": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.QUARTER_OF_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.QUARTER_OF_YEAR)); param1" | } | } | }, @@ -3088,7 +3089,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "(def arg0 = (!doc.containsKey('toLocation') || doc['toLocation'].empty ? null : doc['toLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon)) >= 4000000.0", + | "source": "(def arg0 = (doc['toLocation'].size() == 0 ? null : doc['toLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon)) >= 4000000.0", | "params": { | "lat": -70.0, | "lon": 40.0 @@ -3112,7 +3113,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "(def arg0 = (!doc.containsKey('fromLocation') || doc['fromLocation'].empty ? null : doc['fromLocation']); def arg1 = (!doc.containsKey('toLocation') || doc['toLocation'].empty ? null : doc['toLocation']); (arg0 == null || arg1 == null) ? null : arg0.arcDistance(arg1.lat, arg1.lon)) < 2000000.0" + | "source": "(def arg0 = (doc['fromLocation'].size() == 0 ? null : doc['fromLocation']); def arg1 = (doc['toLocation'].size() == 0 ? null : doc['toLocation']); (arg0 == null || arg1 == null) ? null : arg0.arcDistance(arg1.lat, arg1.lon)) < 2000000.0" | } | } | }, @@ -3131,7 +3132,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "d1": { | "script": { | "lang": "painless", - | "source": "(def arg0 = (!doc.containsKey('toLocation') || doc['toLocation'].empty ? null : doc['toLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon))", + | "source": "(def arg0 = (doc['toLocation'].size() == 0 ? null : doc['toLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon))", | "params": { | "lat": -70.0, | "lon": 40.0 @@ -3141,7 +3142,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "d2": { | "script": { | "lang": "painless", - | "source": "(def arg0 = (!doc.containsKey('fromLocation') || doc['fromLocation'].empty ? null : doc['fromLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon))", + | "source": "(def arg0 = (doc['fromLocation'].size() == 0 ? null : doc['fromLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon))", | "params": { | "lat": -70.0, | "lon": 40.0 @@ -3218,7 +3219,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); param1 == null ? false : (param1.isBefore(param2.withDayOfMonth(param2.lengthOfMonth())) == false)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); param1 == null ? false : (param1.isBefore(param2.withDayOfMonth(param2.lengthOfMonth())) == false)" | } | } | }, @@ -3308,7 +3309,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('comments.replies.lastUpdated') || doc['comments.replies.lastUpdated'].empty ? null : doc['comments.replies.lastUpdated'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-10\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); param1 == null ? false : (param1.isBefore(param2.withDayOfMonth(param2.lengthOfMonth())))" + | "source": "def param1 = (doc['comments.replies.lastUpdated'].size() == 0 ? null : doc['comments.replies.lastUpdated'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-10\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); param1 == null ? false : (param1.isBefore(param2.withDayOfMonth(param2.lengthOfMonth())))" | } | } | } @@ -3406,7 +3407,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('replies.lastUpdated') || doc['replies.lastUpdated'].empty ? null : doc['replies.lastUpdated'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-10\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); param1 == null ? false : (param1.isBefore(param2.withDayOfMonth(param2.lengthOfMonth())))" + | "source": "def param1 = (doc['replies.lastUpdated'].size() == 0 ? null : doc['replies.lastUpdated'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-10\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); param1 == null ? false : (param1.isBefore(param2.withDayOfMonth(param2.lengthOfMonth())))" | } | } | }, @@ -3513,7 +3514,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.toLocalDate()); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param1 == null ? false : (param1.isBefore(param2))" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.toLocalDate()); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param1 == null ? false : (param1.isBefore(param2))" | } | } | } diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 173d5ab2..52e21ee0 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -279,18 +279,18 @@ object ElasticAggregation { nested: Option[NestedElement], allElasticAggregations: Seq[ElasticAggregation] ): Option[Aggregation] = { - var first = false val nbBuckets = buckets.size buckets.reverse.foldLeft(Option.empty[Aggregation]) { (current, bucket) => // Determine the bucketPath of the current bucket val currentBucketPath = bucket.identifier.path - val minDocCount = - if ((first || current.isEmpty) && nbBuckets > 1) { - 0 + val aggScript = + if (bucket.shouldBeScripted) { + val context = PainlessContext() + val painless = bucket.painless(Some(context)) + Some(Script(s"$context$painless").lang("painless")) } else { - first = true - 1 + None } var agg: Aggregation = { @@ -317,32 +317,74 @@ object ElasticAggregation { } else { None } - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - DateHistogramAggregation(bucket.name, interval = interval) - .field(currentBucketPath) - .minDocCount(minDocCount) - .order(direction match { - case Asc => HistogramOrder("_key", asc = true) - case _ => HistogramOrder("_key", asc = false) - }) + + aggScript match { + case Some(script) => + // Scripted date histogram + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, interval = interval) + .script(script) + .minDocCount(1) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, interval = interval) + .script(script) + .minDocCount(1) + } case _ => - DateHistogramAggregation(bucket.name, interval = interval) - .field(currentBucketPath) - .minDocCount(minDocCount) + // Standard date histogram + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, interval = interval) + .field(currentBucketPath) + .minDocCount(1) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, interval = interval) + .field(currentBucketPath) + .minDocCount(1) + } } + case _ => - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - termsAgg(bucket.name, currentBucketPath) - .minDocCount(minDocCount) - .order(Seq(direction match { - case Asc => TermsOrder("_key", asc = true) - case _ => TermsOrder("_key", asc = false) - })) + aggScript match { + case Some(script) => + // Scripted terms aggregation + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + TermsAggregation(bucket.name) + .script(script) + .minDocCount(1) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + TermsAggregation(bucket.name) + .script(script) + .minDocCount(1) + } case _ => - termsAgg(bucket.name, currentBucketPath) - .minDocCount(minDocCount) + // Standard terms aggregation + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + termsAgg(bucket.name, currentBucketPath) + .minDocCount(1) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + termsAgg(bucket.name, currentBucketPath) + .minDocCount(1) + } } } } diff --git a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 74a8694c..268c8a68 100644 --- a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -851,7 +851,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "ct": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.minus(35, ChronoUnit.MINUTES)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.minus(35, ChronoUnit.MINUTES)); param1" | } | } | }, @@ -874,6 +874,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll(";", "; ") .replaceAll("\\|\\|", " || ") .replaceAll("ChronoUnit", " ChronoUnit") + .replaceAll("==", " == ") } it should "filter with date time and interval" in { @@ -1214,7 +1215,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "field": "createdAt", | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value); (param1 == null) ? null : LocalDate.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"))" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : LocalDate.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"))" | } | } | } @@ -1265,49 +1266,49 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "y": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.withDayOfYear(1).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.withDayOfYear(1).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | }, | "q": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value); def param2 = param1 != null ? param1.withMonth((((param1.getMonthValue() - 1) / 3) * 3) + 1).withDayOfMonth(1).truncatedTo(ChronoUnit.DAYS) : null; def param3 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param3.format(param2)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value); def param2 = param1 != null ? param1.withMonth((((param1.getMonthValue() - 1) / 3) * 3) + 1).withDayOfMonth(1).truncatedTo(ChronoUnit.DAYS) : null; def param3 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param3.format(param2)" | } | }, | "m": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.withDayOfMonth(1).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.withDayOfMonth(1).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | }, | "w": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.with(DayOfWeek.SUNDAY).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.with(DayOfWeek.SUNDAY).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | }, | "d": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | }, | "h": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.HOURS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.HOURS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | }, | "m2": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.MINUTES)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.MINUTES)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | }, | "lastSeen": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.SECONDS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.truncatedTo(ChronoUnit.SECONDS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"); (param1 == null) ? null : param2.format(param1)" | } | } | }, @@ -1382,7 +1383,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "field": "createdAt", | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value); (param1 == null) ? null : ZonedDateTime.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")).truncatedTo(ChronoUnit.MINUTES).get(ChronoField.YEAR)" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : ZonedDateTime.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")).truncatedTo(ChronoUnit.MINUTES).get(ChronoField.YEAR)" | } | } | } @@ -1434,7 +1435,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "lastSeen": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.withDayOfMonth(1).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss XXX\"); (param1 == null) ? null : param2.format(param1)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.withDayOfMonth(1).truncatedTo(ChronoUnit.DAYS)); def param2 = DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss XXX\"); (param1 == null) ? null : param2.format(param1)" | } | } | }, @@ -1480,7 +1481,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "diff": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('updatedAt') || doc['updatedAt'].empty ? null : doc['updatedAt'].value); def param2 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value); (param1 == null || param2 == null) ? null : ChronoUnit.DAYS.between(param1, param2)" + | "source": "def param1 = (doc['updatedAt'].size() == 0 ? null : doc['updatedAt'].value); def param2 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null || param2 == null) ? null : ChronoUnit.DAYS.between(param1, param2)" | } | } | }, @@ -1531,7 +1532,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "max": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('updatedAt') || doc['updatedAt'].empty ? null : doc['updatedAt'].value); def param2 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value); def param3 = (param2 == null) ? null : ZonedDateTime.parse(param2, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")); (param1 == null || param2 == null) ? null : ChronoUnit.DAYS.between(param1, param3)" + | "source": "def param1 = (doc['updatedAt'].size() == 0 ? null : doc['updatedAt'].value); def param2 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); def param3 = (param2 == null) ? null : ZonedDateTime.parse(param2, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")); (param1 == null || param2 == null) ? null : ChronoUnit.DAYS.between(param1, param3)" | } | } | } @@ -1583,7 +1584,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "lastSeen": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.plus(10, ChronoUnit.DAYS)); param1" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.plus(10, ChronoUnit.DAYS)); param1" | } | } | }, @@ -1634,7 +1635,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "lastSeen": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.minus(10, ChronoUnit.DAYS)); param1" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.minus(10, ChronoUnit.DAYS)); param1" | } | } | }, @@ -1685,7 +1686,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "lastSeen": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.plus(10, ChronoUnit.DAYS)); param1" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.plus(10, ChronoUnit.DAYS)); param1" | } | } | }, @@ -1736,7 +1737,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "lastSeen": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.minus(10, ChronoUnit.DAYS)); param1" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.minus(10, ChronoUnit.DAYS)); param1" | } | } | }, @@ -1779,7 +1780,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "flag": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); param1 == null" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); param1 == null" | } | } | }, @@ -1817,7 +1818,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "flag": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); param1 != null" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); param1 != null" | } | } | }, @@ -1917,7 +1918,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.minus(35, ChronoUnit.MINUTES)); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param1 != null ? param1 : param2" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.minus(35, ChronoUnit.MINUTES)); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param1 != null ? param1 : param2" | } | } | }, @@ -1967,7 +1968,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")).minus(2, ChronoUnit.DAYS); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param3 != null ? param3 : param4" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")).minus(2, ChronoUnit.DAYS); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param3 != null ? param3 : param4" | } | } | }, @@ -2025,7 +2026,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().minus(2, ChronoUnit.HOURS); try { param3 != null ? param3 : param4 } catch (Exception e) { return null; }" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().minus(2, ChronoUnit.HOURS); try { param3 != null ? param3 : param4 } catch (Exception e) { return null; }" | } | }, | "c2": { @@ -2109,7 +2110,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value); def param2 = ZonedDateTime.now(ZoneId.of('Z')).minus(7, ChronoUnit.DAYS); def param3 = param1 == null ? false : (param1.isAfter(param2)); def param4 = (!doc.containsKey('lastSeen') || doc['lastSeen'].empty ? null : doc['lastSeen'].value.plus(2, ChronoUnit.DAYS)); def param5 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value); param3 ? param1 : param4 != null ? param4 : param5" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value); def param2 = ZonedDateTime.now(ZoneId.of('Z')).minus(7, ChronoUnit.DAYS); def param3 = param1 == null ? false : (param1.isAfter(param2)); def param4 = (doc['lastSeen'].size() == 0 ? null : doc['lastSeen'].value.plus(2, ChronoUnit.DAYS)); def param5 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); param3 ? param1 : param4 != null ? param4 : param5" | } | } | }, @@ -2162,7 +2163,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().minus(7, ChronoUnit.DAYS); def param2 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.toLocalDate().minus(3, ChronoUnit.DAYS)); def param3 = (!doc.containsKey('lastSeen') || doc['lastSeen'].empty ? null : doc['lastSeen'].value.toLocalDate().plus(2, ChronoUnit.DAYS)); def param4 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.toLocalDate()); param1 != null && param1.isEqual(param2) ? param2 : param1 != null && param1.isEqual(param3) ? param3 : param4" + | "source": "def param1 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().minus(7, ChronoUnit.DAYS); def param2 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.toLocalDate().minus(3, ChronoUnit.DAYS)); def param3 = (doc['lastSeen'].size() == 0 ? null : doc['lastSeen'].value.toLocalDate().plus(2, ChronoUnit.DAYS)); def param4 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); param1 != null && param1.isEqual(param2) ? param2 : param1 != null && param1.isEqual(param3) ? param3 : param4" | } | } | }, @@ -2217,91 +2218,91 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "dom": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_MONTH)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_MONTH)); param1" | } | }, | "dow": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_WEEK)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_WEEK)); param1" | } | }, | "doy": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_YEAR)); param1" | } | }, | "m": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MONTH_OF_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MONTH_OF_YEAR)); param1" | } | }, | "y": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.YEAR)); param1" | } | }, | "h": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.HOUR_OF_DAY)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.HOUR_OF_DAY)); param1" | } | }, | "minutes": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MINUTE_OF_HOUR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MINUTE_OF_HOUR)); param1" | } | }, | "s": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.SECOND_OF_MINUTE)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.SECOND_OF_MINUTE)); param1" | } | }, | "nano": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.NANO_OF_SECOND)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.NANO_OF_SECOND)); param1" | } | }, | "micro": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MICRO_OF_SECOND)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MICRO_OF_SECOND)); param1" | } | }, | "milli": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MILLI_OF_SECOND)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MILLI_OF_SECOND)); param1" | } | }, | "epoch": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.EPOCH_DAY)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.EPOCH_DAY)); param1" | } | }, | "off": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.OFFSET_SECONDS)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.OFFSET_SECONDS)); param1" | } | }, | "w": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.WEEK_OF_WEEK_BASED_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.WEEK_OF_WEEK_BASED_YEAR)); param1" | } | }, | "q": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.QUARTER_OF_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.QUARTER_OF_YEAR)); param1" | } | } | }, @@ -2341,7 +2342,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().get(ChronoField.YEAR); (param1 == null) ? null : (param1 * (param2 - 10)) > 10000" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().get(ChronoField.YEAR); (param1 == null) ? null : (param1 * (param2 - 10)) > 10000" | } | } | } @@ -2352,37 +2353,37 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "add": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : (param1 + 1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 + 1)" | } | }, | "sub": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : (param1 - 1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 - 1)" | } | }, | "mul": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : (param1 * 2)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 * 2)" | } | }, | "div": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : (param1 / 2)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 / 2)" | } | }, | "mod": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : (param1 % 2)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 % 2)" | } | }, | "identifier_mul_identifier2_minus_10": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); def param2 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); def lv0 = ((param1 == null || param2 == null) ? null : (param1 * param2)); (lv0 == null) ? null : (lv0 - 10)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); def lv0 = ((param1 == null || param2 == null) ? null : (param1 * param2)); (lv0 == null) ? null : (lv0 - 10)" | } | } | }, @@ -2430,7 +2431,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.sqrt(param1) > 100.0" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.sqrt(param1) > 100.0" | } | } | } @@ -2441,109 +2442,109 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "abs_identifier_plus_1_0_mul_2": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); ((param1 == null) ? null : Math.abs(param1) + 1.0) * ((double) 2)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); ((param1 == null) ? null : Math.abs(param1) + 1.0) * ((double) 2)" | } | }, | "ceil_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.ceil(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.ceil(param1)" | } | }, | "floor_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.floor(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.floor(param1)" | } | }, | "sqrt_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.sqrt(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.sqrt(param1)" | } | }, | "exp_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.exp(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.exp(param1)" | } | }, | "log_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.log(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.log(param1)" | } | }, | "log10_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.log10(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.log10(param1)" | } | }, | "pow_identifier_3": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.pow(param1, 3)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.pow(param1, 3)" | } | }, | "round_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); def param2 = Math.pow(10, 0); (param1 == null || param2 == null) ? null : Math.round((param1 * param2) / param2)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = Math.pow(10, 0); (param1 == null || param2 == null) ? null : Math.round((param1 * param2) / param2)" | } | }, | "round_identifier_2": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); def param2 = Math.pow(10, 2); (param1 == null || param2 == null) ? null : Math.round((param1 * param2) / param2)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = Math.pow(10, 2); (param1 == null || param2 == null) ? null : Math.round((param1 * param2) / param2)" | } | }, | "sign_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : (param1 > 0 ? 1 : (param1 < 0 ? -1 : 0))" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 > 0 ? 1 : (param1 < 0 ? -1 : 0))" | } | }, | "cos_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.cos(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.cos(param1)" | } | }, | "acos_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.acos(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.acos(param1)" | } | }, | "sin_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.sin(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.sin(param1)" | } | }, | "asin_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.asin(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.asin(param1)" | } | }, | "tan_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.tan(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.tan(param1)" | } | }, | "atan_identifier": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.atan(param1)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.atan(param1)" | } | }, | "atan2_identifier_3_0": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier') || doc['identifier'].empty ? null : doc['identifier'].value); (param1 == null) ? null : Math.atan2(param1, 3.0)" + | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Math.atan2(param1, 3.0)" | } | } | }, @@ -2600,7 +2601,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.trim().length() > 10" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.trim().length() > 10" | } | } | } @@ -2611,85 +2612,85 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "len": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.length()" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.length()" | } | }, | "low": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.toLowerCase()" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.toLowerCase()" | } | }, | "upp": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.toUpperCase()" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.toUpperCase()" | } | }, | "sub": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.substring(0, Math.min(3, param1.length()))" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.substring(0, Math.min(3, param1.length()))" | } | }, | "tr": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.trim()" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.trim()" | } | }, | "ltr": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.replaceAll(\"^\\\\s+\",\"\")" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.replaceAll(\"^\\\\s+\",\"\")" | } | }, | "rtr": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.replaceAll(\"\\\\s+$\",\"\")" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.replaceAll(\"\\\\s+$\",\"\")" | } | }, | "con": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : String.valueOf(param1) + \"_test\" + String.valueOf(1)" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : String.valueOf(param1) + \"_test\" + String.valueOf(1)" | } | }, | "l": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.substring(0, Math.min(5, param1.length()))" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.substring(0, Math.min(5, param1.length()))" | } | }, | "r": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.substring(param1.length() - Math.min(3, param1.length()))" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.substring(param1.length() - Math.min(3, param1.length()))" | } | }, | "rep": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.replace(\"el\", \"le\")" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.replace(\"el\", \"le\")" | } | }, | "rev": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : new StringBuilder(param1).reverse().toString()" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : new StringBuilder(param1).reverse().toString()" | } | }, | "pos": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : param1.indexOf(\"soft\", 0) + 1" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : param1.indexOf(\"soft\", 0) + 1" | } | }, | "reg": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (param1 == null) ? null : java.util.regex.Pattern.compile(\"soft\", java.util.regex.Pattern.CASE_INSENSITIVE | java.util.regex.Pattern.MULTILINE).matcher(param1).find()" + | "source": "def param1 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); (param1 == null) ? null : java.util.regex.Pattern.compile(\"soft\", java.util.regex.Pattern.CASE_INSENSITIVE | java.util.regex.Pattern.MULTILINE).matcher(param1).find()" | } | } | }, @@ -2757,7 +2758,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "hire_date": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('hire_date') || doc['hire_date'].empty ? null : doc['hire_date'].value.toLocalDate()); param1" + | "source": "def param1 = (doc['hire_date'].size() == 0 ? null : doc['hire_date'].value.toLocalDate()); param1" | } | } | }, @@ -2892,7 +2893,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "ld": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.toLocalDate()); (param1 == null) ? null : param1.withDayOfMonth(param1.lengthOfMonth())" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); (param1 == null) ? null : param1.withDayOfMonth(param1.lengthOfMonth())" | } | } | }, @@ -2948,91 +2949,91 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "y": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.YEAR)); param1" | } | }, | "m": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MONTH_OF_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MONTH_OF_YEAR)); param1" | } | }, | "wd": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value); (param1 == null) ? null : (param1.get(ChronoField.DAY_OF_WEEK) + 6) % 7" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : (param1.get(ChronoField.DAY_OF_WEEK) + 6) % 7" | } | }, | "yd": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_YEAR)); param1" | } | }, | "d": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_MONTH)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.DAY_OF_MONTH)); param1" | } | }, | "h": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.HOUR_OF_DAY)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.HOUR_OF_DAY)); param1" | } | }, | "minutes": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MINUTE_OF_HOUR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MINUTE_OF_HOUR)); param1" | } | }, | "s": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.SECOND_OF_MINUTE)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.SECOND_OF_MINUTE)); param1" | } | }, | "nano": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.NANO_OF_SECOND)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.NANO_OF_SECOND)); param1" | } | }, | "micro": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MICRO_OF_SECOND)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MICRO_OF_SECOND)); param1" | } | }, | "milli": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.MILLI_OF_SECOND)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.MILLI_OF_SECOND)); param1" | } | }, | "epoch": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.EPOCH_DAY)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.EPOCH_DAY)); param1" | } | }, | "off": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(ChronoField.OFFSET_SECONDS)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(ChronoField.OFFSET_SECONDS)); param1" | } | }, | "w": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.WEEK_OF_WEEK_BASED_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.WEEK_OF_WEEK_BASED_YEAR)); param1" | } | }, | "q": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('createdAt') || doc['createdAt'].empty ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.QUARTER_OF_YEAR)); param1" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.get(java.time.temporal.IsoFields.QUARTER_OF_YEAR)); param1" | } | } | }, @@ -3088,7 +3089,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "(def arg0 = (!doc.containsKey('toLocation') || doc['toLocation'].empty ? null : doc['toLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon)) >= 4000000.0", + | "source": "(def arg0 = (doc['toLocation'].size() == 0 ? null : doc['toLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon)) >= 4000000.0", | "params": { | "lat": -70.0, | "lon": 40.0 @@ -3112,7 +3113,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "(def arg0 = (!doc.containsKey('fromLocation') || doc['fromLocation'].empty ? null : doc['fromLocation']); def arg1 = (!doc.containsKey('toLocation') || doc['toLocation'].empty ? null : doc['toLocation']); (arg0 == null || arg1 == null) ? null : arg0.arcDistance(arg1.lat, arg1.lon)) < 2000000.0" + | "source": "(def arg0 = (doc['fromLocation'].size() == 0 ? null : doc['fromLocation']); def arg1 = (doc['toLocation'].size() == 0 ? null : doc['toLocation']); (arg0 == null || arg1 == null) ? null : arg0.arcDistance(arg1.lat, arg1.lon)) < 2000000.0" | } | } | }, @@ -3131,7 +3132,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "d1": { | "script": { | "lang": "painless", - | "source": "(def arg0 = (!doc.containsKey('toLocation') || doc['toLocation'].empty ? null : doc['toLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon))", + | "source": "(def arg0 = (doc['toLocation'].size() == 0 ? null : doc['toLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon))", | "params": { | "lat": -70.0, | "lon": 40.0 @@ -3141,7 +3142,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "d2": { | "script": { | "lang": "painless", - | "source": "(def arg0 = (!doc.containsKey('fromLocation') || doc['fromLocation'].empty ? null : doc['fromLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon))", + | "source": "(def arg0 = (doc['fromLocation'].size() == 0 ? null : doc['fromLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon))", | "params": { | "lat": -70.0, | "lon": 40.0 @@ -3218,7 +3219,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); param1 == null ? false : (param1.isBefore(param2.withDayOfMonth(param2.lengthOfMonth())) == false)" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); param1 == null ? false : (param1.isBefore(param2.withDayOfMonth(param2.lengthOfMonth())) == false)" | } | } | }, @@ -3308,7 +3309,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('comments.replies.lastUpdated') || doc['comments.replies.lastUpdated'].empty ? null : doc['comments.replies.lastUpdated'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-10\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); param1 == null ? false : (param1.isBefore(param2.withDayOfMonth(param2.lengthOfMonth())))" + | "source": "def param1 = (doc['comments.replies.lastUpdated'].size() == 0 ? null : doc['comments.replies.lastUpdated'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-10\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); param1 == null ? false : (param1.isBefore(param2.withDayOfMonth(param2.lengthOfMonth())))" | } | } | } @@ -3406,7 +3407,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('replies.lastUpdated') || doc['replies.lastUpdated'].empty ? null : doc['replies.lastUpdated'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-10\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); param1 == null ? false : (param1.isBefore(param2.withDayOfMonth(param2.lengthOfMonth())))" + | "source": "def param1 = (doc['replies.lastUpdated'].size() == 0 ? null : doc['replies.lastUpdated'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-10\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); param1 == null ? false : (param1.isBefore(param2.withDayOfMonth(param2.lengthOfMonth())))" | } | } | }, @@ -3513,7 +3514,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "script": { | "script": { | "lang": "painless", - | "source": "def param1 = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value.toLocalDate()); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param1 == null ? false : (param1.isBefore(param2))" + | "source": "def param1 = (doc['lastUpdated'].size() == 0 ? null : doc['lastUpdated'].value.toLocalDate()); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param1 == null ? false : (param1.isBefore(param2))" | } | } | } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/geo/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/geo/package.scala index 4f2d46fb..6f450160 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/geo/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/geo/package.scala @@ -152,7 +152,7 @@ package object geo { identifiers.zipWithIndex .map { case (a, i) => val name = a.name - s"def arg$i = (!doc.containsKey('$name') || doc['$name'].empty ? ${a.nullValue} : doc['$name']);" + s"def arg$i = (doc['$name'].size() == 0 ? ${a.nullValue} : doc['$name']);" } .mkString(" ") diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala index 0d0722c7..687c20d6 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala @@ -139,6 +139,9 @@ package object function { case f => f } } + + override def shouldBeScripted: Boolean = functions.exists(_.shouldBeScripted) + } trait FunctionN[In <: SQLType, Out <: SQLType] extends Function with PainlessScript { @@ -277,6 +280,8 @@ package object function { override def args: List[PainlessScript] = List(left, right) override def nullable: Boolean = left.nullable || right.nullable + + override def shouldBeScripted: Boolean = left.shouldBeScripted || right.shouldBeScripted } trait TransformFunction[In <: SQLType, Out <: SQLType] extends FunctionN[In, Out] { @@ -312,6 +317,8 @@ package object function { s"$base${painless(context)}" } } + + override def shouldBeScripted: Boolean = true } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/time/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/time/package.scala index 8f4d183b..82fee992 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/time/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/time/package.scala @@ -23,7 +23,6 @@ import app.softnetwork.elastic.sql.{ Identifier, LiteralParam, PainlessContext, - PainlessParam, PainlessScript, StringValue, TokenRegex @@ -269,6 +268,9 @@ package object time { case _ => super.toPainlessCall(callArgs, context) } } + + override def shouldBeScripted: Boolean = false + } case object Extract extends Expr("EXTRACT") with TokenRegex with PainlessScript { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala index 2fea4f12..bb3a9ace 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala @@ -69,6 +69,7 @@ package object sql { def isTemporal: Boolean = out.isInstanceOf[SQLTemporal] def isAggregation: Boolean = false def hasAggregation: Boolean = isAggregation + def shouldBeScripted: Boolean = false } trait TokenValue extends Token { @@ -735,8 +736,7 @@ package object sql { def checkNotNull: String = if (path.isEmpty) "" else - s"(!doc.containsKey('$path') || doc['$path'].empty ? $nullValue : doc['$path'].value${painlessMethods - .mkString("")})" + s"(doc['$path'].size() == 0 ? $nullValue : doc['$path'].value${painlessMethods.mkString("")})" override def painless(context: Option[PainlessContext]): String = { val base = diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala index 1b6e9b2c..710d4af5 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala @@ -18,7 +18,15 @@ package app.softnetwork.elastic.sql.query import app.softnetwork.elastic.sql.`type`.{SQLType, SQLTypes} import app.softnetwork.elastic.sql.operator._ -import app.softnetwork.elastic.sql.{Expr, Identifier, LongValue, TokenRegex, Updateable} +import app.softnetwork.elastic.sql.{ + Expr, + Identifier, + LongValue, + PainlessContext, + PainlessScript, + TokenRegex, + Updateable +} case object GroupBy extends Expr("GROUP BY") with TokenRegex @@ -45,7 +53,8 @@ case class GroupBy(buckets: Seq[Bucket]) extends Updateable { case class Bucket( identifier: Identifier, size: Option[Int] = None -) extends Updateable { +) extends Updateable + with PainlessScript { override def sql: String = s"$identifier" def update(request: SQLSearchRequest): Bucket = { identifier.functions.headOption match { @@ -88,6 +97,18 @@ case class Bucket( } override def out: SQLType = identifier.out + + override def shouldBeScripted: Boolean = identifier.shouldBeScripted + + /** Generate painless script for this token + * + * @param context + * the painless context + * @return + * the painless script + */ + override def painless(context: Option[PainlessContext]): String = + identifier.painless(context) } object MetricSelectorScript { From 8c9c744374bc9843b26b443a955b86822569aa0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 24 Nov 2025 17:46:02 +0100 Subject: [PATCH 13/40] fix sql query specifications --- .../softnetwork/elastic/sql/SQLQuerySpec.scala | 18 +++++++++--------- .../softnetwork/elastic/sql/SQLQuerySpec.scala | 18 +++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 701bab0a..9e12db32 100644 --- a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -542,7 +542,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "terms": { | "field": "City", | "exclude": ["Berlin"], - | "min_doc_count": 0, + | "min_doc_count": 1, | "order": { | "cnt": "desc" | } @@ -1060,7 +1060,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "terms": { | "field": "Country", | "exclude": ["USA"], - | "min_doc_count":1, + | "min_doc_count": 1, | "order": { | "_key": "asc" | } @@ -1070,7 +1070,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "terms": { | "field": "City", | "exclude": ["Berlin"], - | "min_doc_count":0 + | "min_doc_count": 1 | }, | "aggs": { | "cnt": { @@ -1127,7 +1127,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "terms": { | "field": "Country", | "exclude": ["USA"], - | "min_doc_count":1, + | "min_doc_count": 1, | "order": { | "_key": "asc" | } @@ -1137,7 +1137,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "terms": { | "field": "City", | "exclude": ["Berlin"], - | "min_doc_count":0 + | "min_doc_count": 1 | }, | "aggs": { | "cnt": { @@ -1199,7 +1199,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "identifier": { | "terms": { | "field": "identifier", - | "min_doc_count":1, + | "min_doc_count": 1, | "order": { | "ct": "desc" | } @@ -1367,7 +1367,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "identifier": { | "terms": { | "field": "identifier", - | "min_doc_count":1, + | "min_doc_count": 1, | "order": { | "ct": "desc" | } @@ -1525,7 +1525,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "identifier": { | "terms": { | "field": "identifier", - | "min_doc_count":1 + | "min_doc_count": 1 | }, | "aggs": { | "max_diff": { @@ -2767,7 +2767,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "dept": { | "terms": { | "field": "department", - | "min_doc_count":1 + | "min_doc_count": 1 | }, | "aggs": { | "cnt": { diff --git a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 268c8a68..251a619c 100644 --- a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -542,7 +542,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "terms": { | "field": "City", | "exclude": "Berlin", - | "min_doc_count": 0, + | "min_doc_count": 1, | "order": { | "cnt": "desc" | } @@ -1060,7 +1060,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "terms": { | "field": "Country", | "exclude": "USA", - | "min_doc_count":1, + | "min_doc_count": 1, | "order": { | "_key": "asc" | } @@ -1070,7 +1070,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "terms": { | "field": "City", | "exclude": "Berlin", - | "min_doc_count":0 + | "min_doc_count": 1 | }, | "aggs": { | "cnt": { @@ -1127,7 +1127,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "terms": { | "field": "Country", | "exclude": "USA", - | "min_doc_count":1, + | "min_doc_count": 1, | "order": { | "_key": "asc" | } @@ -1137,7 +1137,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "terms": { | "field": "City", | "exclude": "Berlin", - | "min_doc_count":0 + | "min_doc_count": 1 | }, | "aggs": { | "cnt": { @@ -1199,7 +1199,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "identifier": { | "terms": { | "field": "identifier", - | "min_doc_count":1, + | "min_doc_count": 1, | "order": { | "ct": "desc" | } @@ -1367,7 +1367,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "identifier": { | "terms": { | "field": "identifier", - | "min_doc_count":1, + | "min_doc_count": 1, | "order": { | "ct": "desc" | } @@ -1525,7 +1525,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "identifier": { | "terms": { | "field": "identifier", - | "min_doc_count":1 + | "min_doc_count": 1 | }, | "aggs": { | "max_diff": { @@ -2767,7 +2767,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "dept": { | "terms": { | "field": "department", - | "min_doc_count":1 + | "min_doc_count": 1 | }, | "aggs": { | "cnt": { From ece2def4a6eb405ff80a37e23dfb6cfcc3229742 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 24 Nov 2025 18:39:48 +0100 Subject: [PATCH 14/40] fix sql script fields with aggregations, shouldBeScripted with arithmetic expressions, coalesce painless script --- .../elastic/sql/bridge/ElasticAggregation.scala | 1 - .../softnetwork/elastic/sql/SQLQuerySpec.scala | 17 ++++++----------- .../elastic/sql/bridge/ElasticAggregation.scala | 1 - .../softnetwork/elastic/sql/SQLQuerySpec.scala | 17 ++++++----------- .../elastic/sql/function/cond/package.scala | 4 ++-- .../operator/math/ArithmeticExpression.scala | 2 ++ .../elastic/sql/query/SQLSearchRequest.scala | 9 +++++++-- 7 files changed, 23 insertions(+), 28 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 2bb5b5b6..8ed1fb5e 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -282,7 +282,6 @@ object ElasticAggregation { nested: Option[NestedElement], allElasticAggregations: Seq[ElasticAggregation] ): Option[Aggregation] = { - val nbBuckets = buckets.size buckets.reverse.foldLeft(Option.empty[Aggregation]) { (current, bucket) => // Determine the bucketPath of the current bucket val currentBucketPath = bucket.identifier.path diff --git a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 9e12db32..f0a07bd1 100644 --- a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -1918,7 +1918,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.minus(35, ChronoUnit.MINUTES)); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param1 != null ? param1 : param2" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.minus(35, ChronoUnit.MINUTES)); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); (param1 != null ? param1 : param2)" | } | } | }, @@ -1952,6 +1952,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll("ChronoUnit", " ChronoUnit") .replaceAll("=ZonedDateTime", " = ZonedDateTime") .replaceAll(":ZonedDateTime", " : ZonedDateTime") + .replaceAll(";\\(param", "; (param") } it should "handle nullif function as script field" in { @@ -1968,7 +1969,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")).minus(2, ChronoUnit.DAYS); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param3 != null ? param3 : param4" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")).minus(2, ChronoUnit.DAYS); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); (param3 != null ? param3 : param4)" | } | } | }, @@ -2010,6 +2011,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll(",DateTimeFormatter", ", DateTimeFormatter") .replaceAll("=ZonedDateTime", " = ZonedDateTime") .replaceAll(":ZonedDateTime", " : ZonedDateTime") + .replaceAll(";\\(param", "; (param") } it should "handle cast function as script field" in { @@ -2026,7 +2028,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().minus(2, ChronoUnit.HOURS); try { param3 != null ? param3 : param4 } catch (Exception e) { return null; }" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().minus(2, ChronoUnit.HOURS); try { (param3 != null ? param3 : param4) } catch (Exception e) { return null; }" | } | }, | "c2": { @@ -2094,6 +2096,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll(":ZonedDateTime", " : ZonedDateTime") .replaceAll("try \\{", "try { ") .replaceAll("} catch", " } catch") + .replaceAll(";\\(param", "; (param") } it should "handle case function as script field" in { // 40 @@ -2754,14 +2757,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "script_fields": { - | "hire_date": { - | "script": { - | "lang": "painless", - | "source": "def param1 = (doc['hire_date'].size() == 0 ? null : doc['hire_date'].value.toLocalDate()); param1" - | } - | } - | }, | "_source": false, | "aggs": { | "dept": { diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 52e21ee0..b2bc13bb 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -279,7 +279,6 @@ object ElasticAggregation { nested: Option[NestedElement], allElasticAggregations: Seq[ElasticAggregation] ): Option[Aggregation] = { - val nbBuckets = buckets.size buckets.reverse.foldLeft(Option.empty[Aggregation]) { (current, bucket) => // Determine the bucketPath of the current bucket val currentBucketPath = bucket.identifier.path diff --git a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 251a619c..133cde4c 100644 --- a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -1918,7 +1918,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.minus(35, ChronoUnit.MINUTES)); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param1 != null ? param1 : param2" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.minus(35, ChronoUnit.MINUTES)); def param2 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); (param1 != null ? param1 : param2)" | } | } | }, @@ -1952,6 +1952,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll("ChronoUnit", " ChronoUnit") .replaceAll("=ZonedDateTime", " = ZonedDateTime") .replaceAll(":ZonedDateTime", " : ZonedDateTime") + .replaceAll(";\\(param", "; (param") } it should "handle nullif function as script field" in { @@ -1968,7 +1969,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")).minus(2, ChronoUnit.DAYS); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); param3 != null ? param3 : param4" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")).minus(2, ChronoUnit.DAYS); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate(); (param3 != null ? param3 : param4)" | } | } | }, @@ -2010,6 +2011,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll(",DateTimeFormatter", ", DateTimeFormatter") .replaceAll("=ZonedDateTime", " = ZonedDateTime") .replaceAll(":ZonedDateTime", " : ZonedDateTime") + .replaceAll(";\\(param", "; (param") } it should "handle cast function as script field" in { @@ -2026,7 +2028,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "c": { | "script": { | "lang": "painless", - | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().minus(2, ChronoUnit.HOURS); try { param3 != null ? param3 : param4 } catch (Exception e) { return null; }" + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value.toLocalDate()); def param2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern(\"yyyy-MM-dd\")); def param3 = param1 == null || param1.isEqual(param2) ? null : param1; def param4 = ZonedDateTime.now(ZoneId.of('Z')).toLocalDate().minus(2, ChronoUnit.HOURS); try { (param3 != null ? param3 : param4) } catch (Exception e) { return null; }" | } | }, | "c2": { @@ -2094,6 +2096,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll(":ZonedDateTime", " : ZonedDateTime") .replaceAll("try \\{", "try { ") .replaceAll("} catch", " } catch") + .replaceAll(";\\(param", "; (param") } it should "handle case function as script field" in { // 40 @@ -2754,14 +2757,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all": {} | }, | "size": 0, - | "script_fields": { - | "hire_date": { - | "script": { - | "lang": "painless", - | "source": "def param1 = (doc['hire_date'].size() == 0 ? null : doc['hire_date'].value.toLocalDate()); param1" - | } - | } - | }, | "_source": false, | "aggs": { | "dept": { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/cond/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/cond/package.scala index 9670a497..0c432619 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/cond/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/cond/package.scala @@ -138,9 +138,9 @@ package object cond { callArgs .take(values.length - 1) .map { arg => - s"${arg.trim} != null ? ${arg.trim}" // TODO check when value is nullable and has functions + s"(${arg.trim} != null ? ${arg.trim}" // TODO check when value is nullable and has functions } - .mkString(" : ") + s" : ${callArgs.last}" + .mkString(" : ") + s" : ${callArgs.last})" } override def nullable: Boolean = values.forall(_.nullable) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala index 0eae1f5e..d97f4bc3 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala @@ -130,4 +130,6 @@ case class ArithmeticExpression( } override def hasAggregation: Boolean = left.hasAggregation || right.hasAggregation + + override def shouldBeScripted: Boolean = true } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala index 31d4ac01..b711a54f 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala @@ -38,7 +38,7 @@ case class SQLSearchRequest( lazy val bucketNames: Map[String, Bucket] = buckets.flatMap { b => val name = b.identifier.identifierName "\\d+".r.findFirstIn(name) match { - case Some(n) => + case Some(n) if name.trim.split(" ").length == 1 => val identifier = select.fields(n.toInt - 1).identifier val updated = b.copy(identifier = select.fields(n.toInt - 1).identifier) Map( @@ -114,7 +114,12 @@ case class SQLSearchRequest( ) } - lazy val scriptFields: Seq[Field] = select.fields.filter(_.isScriptField) + lazy val scriptFields: Seq[Field] = { + if (aggregates.nonEmpty) + Seq.empty + else + select.fields.filter(_.isScriptField) + } lazy val fields: Seq[String] = { if (aggregates.isEmpty && buckets.isEmpty && bucketScripts.isEmpty) From 313b82a2ecf9316681faf9c11b194d5427f8b178 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Wed, 26 Nov 2025 15:12:44 +0100 Subject: [PATCH 15/40] fix param and metric name for count all --- .../app/softnetwork/elastic/sql/package.scala | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala index bb3a9ace..420db60d 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala @@ -16,7 +16,7 @@ package app.softnetwork.elastic -import app.softnetwork.elastic.sql.function.aggregate.{MAX, MIN} +import app.softnetwork.elastic.sql.function.aggregate.{COUNT, MAX, MIN} import app.softnetwork.elastic.sql.function.geo.DistanceUnit import app.softnetwork.elastic.sql.function.time.CurrentFunction import app.softnetwork.elastic.sql.operator._ @@ -625,12 +625,10 @@ package object sql { def bucket: Option[Bucket] def hasBucket: Boolean = bucket.isDefined - def allMetricsPath: Map[String, String] = { - if (isAggregation) { - val metricName = aliasOrName - Map(metricName -> metricName) - } else { - Map.empty + lazy val allMetricsPath: Map[String, String] = { + metricName match { + case Some(name) => Map(name -> name) + case _ => Map.empty } } @@ -667,7 +665,7 @@ package object sql { lazy val aliasOrName: String = fieldAlias.getOrElse(name) - def path: String = + lazy val path: String = nestedElement match { case Some(ne) => name.split("\\.") match { @@ -677,13 +675,32 @@ package object sql { case None => name } - def paramName: String = - if (isAggregation && functions.size == 1) s"params.$aliasOrName" + lazy val paramName: String = + if (isAggregation && functions.size == 1) s"params.${metricName.getOrElse(aliasOrName)}" else if (path.nonEmpty) s"doc['$path'].value" else "" - def script: Option[String] = + lazy val metricName: Option[String] = + aggregateFunction match { + case Some(af) => + af match { + case COUNT => + aliasOrName match { + case "*" => + if (distinct) { + Some(s"count_distinct_all") + } else { + Some(s"count_all") + } + case _ => Some(aliasOrName) + } + case _ => Some(aliasOrName) + } + case _ => None + } + + lazy val script: Option[String] = if (isTemporal) { var orderedFunctions = FunctionUtils.transformFunctions(this).reverse @@ -777,7 +794,7 @@ package object sql { this } - override def value: String = + override lazy val value: String = script match { case Some(s) => s case _ => painless(None) From 6e50bb1eec45f4499760875a7e4cfedd1f53ec12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Wed, 26 Nov 2025 15:55:05 +0100 Subject: [PATCH 16/40] add support for bucket script --- .../sql/bridge/ElasticAggregation.scala | 108 +++++++++++++--- .../sql/bridge/ElasticSearchRequest.scala | 6 +- .../elastic/sql/bridge/package.scala | 25 ++-- .../sql/bridge/ElasticAggregation.scala | 105 +++++++++++++--- .../sql/bridge/ElasticSearchRequest.scala | 6 +- .../elastic/sql/bridge/package.scala | 24 ++-- .../sql/function/aggregate/package.scala | 32 +++++ .../elastic/sql/function/package.scala | 59 ++++++++- .../operator/math/ArithmeticExpression.scala | 3 - .../elastic/sql/query/GroupBy.scala | 4 + .../elastic/sql/query/SQLSearchRequest.scala | 7 +- .../elastic/sql/query/Select.scala | 119 ++++++++++-------- .../softnetwork/elastic/sql/query/Where.scala | 9 +- 13 files changed, 391 insertions(+), 116 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 8ed1fb5e..0948f543 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -28,6 +28,7 @@ import app.softnetwork.elastic.sql.query.{ MetricSelectorScript, NestedElement, NestedElements, + SQLAggregation, SortOrder } import app.softnetwork.elastic.sql.function._ @@ -36,6 +37,7 @@ import app.softnetwork.elastic.sql.function.time.DateTrunc import app.softnetwork.elastic.sql.time.TimeUnit import com.sksamuel.elastic4s.ElasticApi.{ avgAgg, + bucketScriptAggregation, bucketSelectorAggregation, cardinalityAgg, maxAgg, @@ -49,6 +51,7 @@ import com.sksamuel.elastic4s.ElasticApi.{ import com.sksamuel.elastic4s.requests.script.Script import com.sksamuel.elastic4s.requests.searches.DateHistogramInterval import com.sksamuel.elastic4s.requests.searches.aggs.{ + AbstractAggregation, Aggregation, CardinalityAggregation, DateHistogramAggregation, @@ -74,7 +77,7 @@ case class ElasticAggregation( nestedAgg: Option[NestedAggregation] = None, filteredAgg: Option[FilterAggregation] = None, aggType: AggregateFunction, - agg: Aggregation, + agg: AbstractAggregation, direction: Option[SortOrder] = None, nestedElement: Option[NestedElement] = None ) { @@ -94,7 +97,8 @@ object ElasticAggregation { def apply( sqlAgg: Field, having: Option[Criteria], - bucketsDirection: Map[String, SortOrder] + bucketsDirection: Map[String, SortOrder], + allAggregations: Map[String, SQLAggregation] ): ElasticAggregation = { import sqlAgg._ val sourceField = identifier.path @@ -111,9 +115,14 @@ object ElasticAggregation { val distinct = identifier.distinct - val aggType = aggregateFunction.getOrElse( - throw new IllegalArgumentException("Aggregation function is required") - ) + val aggType = { + if (isBucketScript) { + BucketScriptAggregation(identifier) + } else + aggregateFunction.getOrElse( + throw new IllegalArgumentException("Aggregation function is required") + ) + } val aggName = { if (fieldAlias.isDefined) @@ -135,7 +144,8 @@ object ElasticAggregation { val (aggFuncs, transformFuncs) = FunctionUtils.aggregateAndTransformFunctions(identifier) - require(aggFuncs.size == 1, s"Multiple aggregate functions not supported: $aggFuncs") + if (!isBucketScript) + require(aggFuncs.size == 1, s"Multiple aggregate functions not supported: $aggFuncs") def aggWithFieldOrScript( buildField: (String, String) => Aggregation, @@ -171,9 +181,8 @@ object ElasticAggregation { case th: WindowFunction => val limit = { th match { - case _: LastValue => 1 - // case _: FirstValue => 1 - case _ => th.limit.map(_.limit).getOrElse(1) + case _: LastValue | _: FirstValue => Some(1) + case _ => th.limit.map(_.limit) } } val topHits = @@ -193,9 +202,9 @@ object ElasticAggregation { .groupBy(_.sourceField) .map(_._2.head) .map(f => f.sourceField -> Script(f.painless(None)).lang("painless")) - .toMap - ) - .size(limit) sortBy th.orderBy.sorts.map(sort => + .toMap, + size = limit + ) sortBy th.orderBy.sorts.map(sort => sort.order match { case Some(Desc) => th.window match { @@ -209,10 +218,25 @@ object ElasticAggregation { } } ) - /*th.fields.filter(_.isScriptField).foldLeft(topHits) { (agg, f) => - agg.script(f.sourceField, Script(f.painless, lang = Some("painless"))) - }*/ topHits + case script: BucketScriptAggregation => + val params = allAggregations.get(aggName) match { + case Some(sqlAgg) => + sqlAgg.aggType match { + case bsa: BucketScriptAggregation => + extractMetricsPathForBucketScript(bsa, allAggregations.values.toSeq) + case _ => Map.empty + } + case None => Map.empty + } + val painless = script.identifier.painless(None) + bucketScriptAggregation( + aggName, + Script(s"$painless").lang("painless"), + params.toMap + ) + case _ => + throw new IllegalArgumentException(s"Unsupported aggregation type: $aggType") } val nestedElement = identifier.nestedElement @@ -276,7 +300,7 @@ object ElasticAggregation { def buildBuckets( buckets: Seq[Bucket], bucketsDirection: Map[String, SortOrder], - aggregations: Seq[Aggregation], + aggregations: Seq[AbstractAggregation], aggregationsDirection: Map[String, SortOrder], having: Option[Criteria], nested: Option[NestedElement], @@ -287,7 +311,7 @@ object ElasticAggregation { val currentBucketPath = bucket.identifier.path val aggScript = - if (bucket.shouldBeScripted) { + if (!bucket.isBucketScript && bucket.shouldBeScripted) { val context = PainlessContext() val painless = bucket.painless(Some(context)) Some(Script(s"$context$painless").lang("painless")) @@ -579,6 +603,54 @@ object ElasticAggregation { } } + def extractMetricsPathForBucketScript( + bucketScriptAggregation: BucketScriptAggregation, + allAggregations: Seq[SQLAggregation] + ): Map[String, String] = { + val currentBucketPath = + bucketScriptAggregation.identifier.nestedElement.map(_.bucketPath).getOrElse("") + // Extract ALL metrics paths + val allMetricsPaths = bucketScriptAggregation.params.keys + val result = + allMetricsPaths.flatMap { metricName => + allAggregations.find(agg => agg.aggName == metricName || agg.field == metricName) match { + case Some(sqlAgg) => + val metricBucketPath = sqlAgg.nestedElement + .map(_.bucketPath) + .getOrElse("") + if (metricBucketPath == currentBucketPath) { + // Metric of the same level + Some(metricName -> metricName) + } else if (isDirectChild(metricBucketPath, currentBucketPath)) { + // Metric of a direct child + // CHECK if it is a "global" metric (cardinality, etc.) or a bucket metric (avg, sum, etc.) + val isGlobalMetric = sqlAgg.isGlobalMetric + + if (isGlobalMetric) { + // Global metric: can be referenced from the parent + val childNestedName = sqlAgg.nestedElement + .map(_.innerHitsName) + .getOrElse("") + // println( + // s"[DEBUG extractMetricsPath] Direct child (global metric): $metricName -> $childNestedName>$metricName" + // ) + Some(metricName -> s"$childNestedName>$metricName") + } else { + // Bucket metric: cannot be referenced from the parent + // println( + // s"[DEBUG extractMetricsPath] Direct child (bucket metric): $metricName -> SKIP (bucket-level metric)" + // ) + None + } + } else { + None + } + case _ => None + } + } + result.toMap + } + /** Extracts the buckets_path for a given bucket */ def extractMetricsPathForBucket( @@ -596,7 +668,7 @@ object ElasticAggregation { // println(s"[DEBUG extractMetricsPath] allMetricsPaths = $allMetricsPaths") // Filter and adapt the paths for this bucket - val result = allMetricsPaths.flatMap { case (metricName, metricPath) => + val result = allMetricsPaths.flatMap { case (metricName, _) => allElasticAggregations.find(agg => agg.aggName == metricName || agg.field == metricName ) match { diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala index a9f4ff76..cfe03ba2 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala @@ -16,10 +16,11 @@ package app.softnetwork.elastic.sql.bridge -import app.softnetwork.elastic.sql.query.{Bucket, Criteria, Except, Field} +import app.softnetwork.elastic.sql.query.{Bucket, Criteria, Except, Field, FieldSort} import com.sksamuel.elastic4s.requests.searches.{SearchBodyBuilderFn, SearchRequest} case class ElasticSearchRequest( + sql: String, fields: Seq[Field], except: Option[Except], sources: Seq[String], @@ -28,7 +29,8 @@ case class ElasticSearchRequest( offset: Option[Int], search: SearchRequest, buckets: Seq[Bucket] = Seq.empty, - aggregations: Seq[ElasticAggregation] = Seq.empty + having: Option[Criteria] = None, + sorts: Seq[FieldSort] = Seq.empty ) { def minScore(score: Option[Double]): ElasticSearchRequest = { score match { diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala index 68e6c2b7..c1b31a50 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala @@ -33,7 +33,7 @@ import com.sksamuel.elastic4s.requests.common.FetchSourceContext import com.sksamuel.elastic4s.requests.script.Script import com.sksamuel.elastic4s.requests.script.ScriptType.Source import com.sksamuel.elastic4s.requests.searches.aggs.{ - Aggregation, + AbstractAggregation, FilterAggregation, NestedAggregation, TermsAggregation @@ -148,7 +148,7 @@ package object bridge { implicit def requestToRootAggregations( request: SQLSearchRequest, aggregations: Seq[ElasticAggregation] - ): Seq[Aggregation] = { + ): Seq[AbstractAggregation] = { val notNestedAggregations = aggregations.filterNot(_.nested) val notNestedBuckets = request.buckets.filterNot(_.nested) @@ -263,7 +263,7 @@ package object bridge { requestToNestedFilterAggregation(request, n.innerHitsName) // Build buckets for this nested aggregation - val buckets: Seq[Aggregation] = + val buckets: Seq[AbstractAggregation] = ElasticAggregation.buildBuckets( nestedBuckets, request.sorts -- directions.keys, @@ -379,7 +379,7 @@ package object bridge { } private def addNestedAggregationsToTermsAggregation( - agg: Aggregation, + agg: AbstractAggregation, nested: Seq[NestedAggregation] ): Option[TermsAggregation] = { agg match { @@ -403,24 +403,29 @@ package object bridge { implicit def requestToElasticSearchRequest(request: SQLSearchRequest): ElasticSearchRequest = ElasticSearchRequest( + request.sql, request.select.fields, request.select.except, request.sources, request.where.flatMap(_.criteria), request.limit.map(_.limit), - request.limit.flatMap(_.offset.map(_.offset)), + request.limit.flatMap(_.offset.map(_.offset)).orElse(Some(0)), request, request.buckets, - request.aggregates.map( - ElasticAggregation(_, request.having.flatMap(_.criteria), request.sorts) - ) + request.having.flatMap(_.criteria), + request.orderBy.map(_.sorts).getOrElse(Seq.empty) ).minScore(request.score) implicit def requestToSearchRequest(request: SQLSearchRequest): SearchRequest = { import request._ val aggregations = request.aggregates.map( - ElasticAggregation(_, request.having.flatMap(_.criteria), request.sorts) + ElasticAggregation( + _, + request.having.flatMap(_.criteria), + request.sorts, + request.sqlAggregations + ) ) val rootAggregations = requestToRootAggregations(request, aggregations) @@ -990,7 +995,7 @@ package object bridge { case Left(l) => val filteredAgg: Option[FilterAggregation] = requestToFilterAggregation(l) l.aggregates - .map(ElasticAggregation(_, l.having.flatMap(_.criteria), l.sorts)) + .map(ElasticAggregation(_, l.having.flatMap(_.criteria), l.sorts, l.sqlAggregations)) .map(aggregation => { val queryFiltered = l.where diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index b2bc13bb..4c3d3f6a 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -28,6 +28,7 @@ import app.softnetwork.elastic.sql.query.{ MetricSelectorScript, NestedElement, NestedElements, + SQLAggregation, SortOrder } import app.softnetwork.elastic.sql.function._ @@ -36,6 +37,7 @@ import app.softnetwork.elastic.sql.function.time.DateTrunc import app.softnetwork.elastic.sql.time.TimeUnit import com.sksamuel.elastic4s.ElasticApi.{ avgAgg, + bucketScriptAggregation, bucketSelectorAggregation, cardinalityAgg, maxAgg, @@ -49,6 +51,7 @@ import com.sksamuel.elastic4s.ElasticApi.{ import com.sksamuel.elastic4s.script.Script import com.sksamuel.elastic4s.searches.DateHistogramInterval import com.sksamuel.elastic4s.searches.aggs.{ + AbstractAggregation, Aggregation, CardinalityAggregation, DateHistogramAggregation, @@ -74,7 +77,7 @@ case class ElasticAggregation( nestedAgg: Option[NestedAggregation] = None, filteredAgg: Option[FilterAggregation] = None, aggType: AggregateFunction, - agg: Aggregation, + agg: AbstractAggregation, direction: Option[SortOrder] = None, nestedElement: Option[NestedElement] = None ) { @@ -94,7 +97,8 @@ object ElasticAggregation { def apply( sqlAgg: Field, having: Option[Criteria], - bucketsDirection: Map[String, SortOrder] + bucketsDirection: Map[String, SortOrder], + allAggregations: Map[String, SQLAggregation] ): ElasticAggregation = { import sqlAgg._ val sourceField = identifier.path @@ -111,9 +115,14 @@ object ElasticAggregation { val distinct = identifier.distinct - val aggType = aggregateFunction.getOrElse( - throw new IllegalArgumentException("Aggregation function is required") - ) + val aggType = { + if (isBucketScript) { + BucketScriptAggregation(identifier) + } else + aggregateFunction.getOrElse( + throw new IllegalArgumentException("Aggregation function is required") + ) + } val aggName = { if (fieldAlias.isDefined) @@ -135,7 +144,8 @@ object ElasticAggregation { val (aggFuncs, transformFuncs) = FunctionUtils.aggregateAndTransformFunctions(identifier) - require(aggFuncs.size == 1, s"Multiple aggregate functions not supported: $aggFuncs") + if (!isBucketScript) + require(aggFuncs.size == 1, s"Multiple aggregate functions not supported: $aggFuncs") def aggWithFieldOrScript( buildField: (String, String) => Aggregation, @@ -171,9 +181,8 @@ object ElasticAggregation { case th: WindowFunction => val limit = { th match { - case _: LastValue => 1 -// case _: FirstValue => 1 - case _ => th.limit.map(_.limit).getOrElse(1) + case _: LastValue | _: FirstValue => Some(1) + case _ => th.limit.map(_.limit) } } val topHits = @@ -193,9 +202,9 @@ object ElasticAggregation { .groupBy(_.sourceField) .map(_._2.head) .map(f => f.sourceField -> Script(f.painless(None)).lang("painless")) - .toMap - ) - .size(limit) sortBy th.orderBy.sorts.map(sort => + .toMap, + size = limit + ) sortBy th.orderBy.sorts.map(sort => sort.order match { case Some(Desc) => th.window match { @@ -210,6 +219,24 @@ object ElasticAggregation { } ) topHits + case script: BucketScriptAggregation => + val params = allAggregations.get(aggName) match { + case Some(sqlAgg) => + sqlAgg.aggType match { + case bsa: BucketScriptAggregation => + extractMetricsPathForBucketScript(bsa, allAggregations.values.toSeq) + case _ => Map.empty + } + case None => Map.empty + } + val painless = script.identifier.painless(None) + bucketScriptAggregation( + aggName, + Script(s"$painless").lang("painless"), + params.toMap + ) + case _ => + throw new IllegalArgumentException(s"Unsupported aggregation type: $aggType") } val nestedElement = identifier.nestedElement @@ -273,7 +300,7 @@ object ElasticAggregation { def buildBuckets( buckets: Seq[Bucket], bucketsDirection: Map[String, SortOrder], - aggregations: Seq[Aggregation], + aggregations: Seq[AbstractAggregation], aggregationsDirection: Map[String, SortOrder], having: Option[Criteria], nested: Option[NestedElement], @@ -284,7 +311,7 @@ object ElasticAggregation { val currentBucketPath = bucket.identifier.path val aggScript = - if (bucket.shouldBeScripted) { + if (!bucket.isBucketScript && bucket.shouldBeScripted) { val context = PainlessContext() val painless = bucket.painless(Some(context)) Some(Script(s"$context$painless").lang("painless")) @@ -576,6 +603,54 @@ object ElasticAggregation { } } + def extractMetricsPathForBucketScript( + bucketScriptAggregation: BucketScriptAggregation, + allAggregations: Seq[SQLAggregation] + ): Map[String, String] = { + val currentBucketPath = + bucketScriptAggregation.identifier.nestedElement.map(_.bucketPath).getOrElse("") + // Extract ALL metrics paths + val allMetricsPaths = bucketScriptAggregation.params.keys + val result = + allMetricsPaths.flatMap { metricName => + allAggregations.find(agg => agg.aggName == metricName || agg.field == metricName) match { + case Some(sqlAgg) => + val metricBucketPath = sqlAgg.nestedElement + .map(_.bucketPath) + .getOrElse("") + if (metricBucketPath == currentBucketPath) { + // Metric of the same level + Some(metricName -> metricName) + } else if (isDirectChild(metricBucketPath, currentBucketPath)) { + // Metric of a direct child + // CHECK if it is a "global" metric (cardinality, etc.) or a bucket metric (avg, sum, etc.) + val isGlobalMetric = sqlAgg.isGlobalMetric + + if (isGlobalMetric) { + // Global metric: can be referenced from the parent + val childNestedName = sqlAgg.nestedElement + .map(_.innerHitsName) + .getOrElse("") + // println( + // s"[DEBUG extractMetricsPath] Direct child (global metric): $metricName -> $childNestedName>$metricName" + // ) + Some(metricName -> s"$childNestedName>$metricName") + } else { + // Bucket metric: cannot be referenced from the parent + // println( + // s"[DEBUG extractMetricsPath] Direct child (bucket metric): $metricName -> SKIP (bucket-level metric)" + // ) + None + } + } else { + None + } + case _ => None + } + } + result.toMap + } + /** Extracts the buckets_path for a given bucket */ def extractMetricsPathForBucket( @@ -593,7 +668,7 @@ object ElasticAggregation { // println(s"[DEBUG extractMetricsPath] allMetricsPaths = $allMetricsPaths") // Filter and adapt the paths for this bucket - val result = allMetricsPaths.flatMap { case (metricName, metricPath) => + val result = allMetricsPaths.flatMap { case (metricName, _) => allElasticAggregations.find(agg => agg.aggName == metricName || agg.field == metricName ) match { diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala index 3afcdc6e..ff2463e8 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala @@ -16,11 +16,12 @@ package app.softnetwork.elastic.sql.bridge -import app.softnetwork.elastic.sql.query.{Bucket, Criteria, Except, Field} +import app.softnetwork.elastic.sql.query.{Bucket, Criteria, Except, Field, FieldSort, Limit} import com.sksamuel.elastic4s.searches.SearchRequest import com.sksamuel.elastic4s.http.search.SearchBodyBuilderFn case class ElasticSearchRequest( + sql: String, fields: Seq[Field], except: Option[Except], sources: Seq[String], @@ -29,7 +30,8 @@ case class ElasticSearchRequest( offset: Option[Int], search: SearchRequest, buckets: Seq[Bucket] = Seq.empty, - aggregations: Seq[ElasticAggregation] = Seq.empty + having: Option[Criteria] = None, + sorts: Seq[FieldSort] = Seq.empty ) { def minScore(score: Option[Double]): ElasticSearchRequest = { score match { diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala index c0a30662..9dd7b421 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala @@ -34,7 +34,7 @@ import com.sksamuel.elastic4s.http.search.SearchBodyBuilderFn import com.sksamuel.elastic4s.script.Script import com.sksamuel.elastic4s.script.ScriptType.Source import com.sksamuel.elastic4s.searches.aggs.{ - Aggregation, + AbstractAggregation, FilterAggregation, NestedAggregation, TermsAggregation @@ -144,7 +144,7 @@ package object bridge { implicit def requestToRootAggregations( request: SQLSearchRequest, aggregations: Seq[ElasticAggregation] - ): Seq[Aggregation] = { + ): Seq[AbstractAggregation] = { val notNestedAggregations = aggregations.filterNot(_.nested) val notNestedBuckets = request.buckets.filterNot(_.nested) @@ -259,7 +259,7 @@ package object bridge { requestToNestedFilterAggregation(request, n.innerHitsName) // Build buckets for this nested aggregation - val buckets: Seq[Aggregation] = + val buckets: Seq[AbstractAggregation] = ElasticAggregation.buildBuckets( nestedBuckets, request.sorts -- directions.keys, @@ -375,7 +375,7 @@ package object bridge { } private def addNestedAggregationsToTermsAggregation( - agg: Aggregation, + agg: AbstractAggregation, nested: Seq[NestedAggregation] ): Option[TermsAggregation] = { agg match { @@ -399,6 +399,7 @@ package object bridge { implicit def requestToElasticSearchRequest(request: SQLSearchRequest): ElasticSearchRequest = ElasticSearchRequest( + request.sql, request.select.fields, request.select.except, request.sources, @@ -407,17 +408,20 @@ package object bridge { request.limit.flatMap(_.offset.map(_.offset)).orElse(Some(0)), request, request.buckets, - request.aggregates.map( - ElasticAggregation(_, request.having.flatMap(_.criteria), request.sorts) - ) - // request.orderBy.map(_.sorts).getOrElse(Seq.empty) + request.having.flatMap(_.criteria), + request.orderBy.map(_.sorts).getOrElse(Seq.empty) ).minScore(request.score) implicit def requestToSearchRequest(request: SQLSearchRequest): SearchRequest = { import request._ val aggregations = request.aggregates.map( - ElasticAggregation(_, request.having.flatMap(_.criteria), request.sorts) + ElasticAggregation( + _, + request.having.flatMap(_.criteria), + request.sorts, + request.sqlAggregations + ) ) val rootAggregations = requestToRootAggregations(request, aggregations) @@ -988,7 +992,7 @@ package object bridge { case Left(l) => val filteredAgg: Option[FilterAggregation] = requestToFilterAggregation(l) l.aggregates - .map(ElasticAggregation(_, l.having.flatMap(_.criteria), l.sorts)) + .map(ElasticAggregation(_, l.having.flatMap(_.criteria), l.sorts, l.sqlAggregations)) .map(aggregation => { val queryFiltered = l.where diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala index 1b1f0abf..c654e92d 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala @@ -25,6 +25,10 @@ package object aggregate { def multivalued: Boolean = false override def isAggregation: Boolean = true + + override def hasAggregation: Boolean = true + + def isBucketScript: Boolean = false } case object COUNT extends Expr("COUNT") with AggregateFunction @@ -55,6 +59,34 @@ package object aggregate { case object PARTITION_BY extends Expr("PARTITION BY") with TokenRegex + case class BucketScriptAggregation( + identifier: Identifier, + params: Map[String, String] = Map.empty + ) extends AggregateFunction + with FunctionWithIdentifier + with Updateable { + override def sql: String = identifier.sql + + override def hasAggregation: Boolean = true + + override def shouldBeScripted: Boolean = true + + override def isBucketScript: Boolean = true + + override def update(request: SQLSearchRequest): BucketScriptAggregation = { + val identifiers = FunctionUtils.aggregateIdentifiers(identifier) + val params = identifiers.flatMap { + case identifier: Identifier => + val name = identifier.metricName.getOrElse(identifier.aliasOrName) + Some(name -> request.fieldAliases.getOrElse(identifier.identifierName, name)) + case _ => None + }.toMap + this.copy(params = params) + } + + override def toString: String = "bucket_script" + } + sealed trait WindowFunction extends AggregateFunction with FunctionWithIdentifier diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala index 687c20d6..eb494083 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala @@ -57,6 +57,61 @@ package object function { aggregateAndTransformFunctions(chain)._2 } + def aggregateFunctions( + fun: Function, + acc: Seq[AggregateFunction] = Seq.empty + ): Seq[AggregateFunction] = { + fun match { + case fwi: FunctionWithIdentifier => aggregateFunctions(fwi.identifier, acc) + case fc: FunctionChain => + fc.functions.foldLeft(acc) { + case (innerAcc, af: AggregateFunction) => innerAcc :+ af + case (innerAcc, i: FunctionWithIdentifier) => + aggregateFunctions(i.identifier, innerAcc) + case (innerAcc, fc: FunctionChain) => aggregateFunctions(fc, innerAcc) + case (innerAcc, b: BinaryFunction[_, _, _]) => aggregateFunctions(b, innerAcc) + case (innerAcc, _) => innerAcc + } + case b: BinaryFunction[_, _, _] => + val leftAcc = b.left match { + case f: Function => aggregateFunctions(f, acc) + case _ => acc + } + b.right match { + case f: Function => aggregateFunctions(f, leftAcc) + case _ => leftAcc + } + case _ => acc + } + } + + def aggregateIdentifiers( + fun: Function, + acc: Seq[FunctionChain] = Seq.empty + ): Seq[FunctionChain] = { + fun match { + case fwi: FunctionWithIdentifier => aggregateIdentifiers(fwi.identifier, acc) + case fc: FunctionChain => + fc.functions.foldLeft(acc) { + case (innerAcc, _: AggregateFunction) => innerAcc :+ fc + case (innerAcc, i: FunctionWithIdentifier) => + aggregateIdentifiers(i.identifier, innerAcc) + case (innerAcc, fc: FunctionChain) => aggregateIdentifiers(fc, innerAcc) + case (innerAcc, b: BinaryFunction[_, _, _]) => aggregateIdentifiers(b, innerAcc) + case (innerAcc, _) => innerAcc + } + case b: BinaryFunction[_, _, _] => + val leftAcc = b.left match { + case f: Function => aggregateIdentifiers(f, acc) + case _ => acc + } + b.right match { + case f: Function => aggregateIdentifiers(f, leftAcc) + case _ => leftAcc + } + case _ => acc + } + } } trait FunctionChain extends Function { @@ -281,7 +336,9 @@ package object function { override def nullable: Boolean = left.nullable || right.nullable - override def shouldBeScripted: Boolean = left.shouldBeScripted || right.shouldBeScripted + override def hasAggregation: Boolean = left.hasAggregation || right.hasAggregation + + override def shouldBeScripted: Boolean = true } trait TransformFunction[In <: SQLType, Out <: SQLType] extends FunctionN[In, Out] { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala index d97f4bc3..24e199c9 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala @@ -129,7 +129,4 @@ case class ArithmeticExpression( expr } - override def hasAggregation: Boolean = left.hasAggregation || right.hasAggregation - - override def shouldBeScripted: Boolean = true } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala index 710d4af5..f3023da4 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala @@ -100,6 +100,10 @@ case class Bucket( override def shouldBeScripted: Boolean = identifier.shouldBeScripted + override def hasAggregation: Boolean = identifier.hasAggregation + + def isBucketScript: Boolean = !identifier.isAggregation && hasAggregation + /** Generate painless script for this token * * @param context diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala index b711a54f..77a2d4f5 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala @@ -122,7 +122,7 @@ case class SQLSearchRequest( } lazy val fields: Seq[String] = { - if (aggregates.isEmpty && buckets.isEmpty && bucketScripts.isEmpty) + if (aggregates.isEmpty && buckets.isEmpty) select.fields .filterNot(_.isScriptField) .filterNot(_.nested) @@ -138,7 +138,9 @@ case class SQLSearchRequest( lazy val windowFunctions: Seq[WindowFunction] = windowFields.flatMap(_.windows) lazy val aggregates: Seq[Field] = - select.fields.filter(_.isAggregation).filterNot(_.windows.isDefined) ++ windowFields + select.fields + .filter(f => f.isAggregation || f.isBucketScript) + .filterNot(_.windows.isDefined) ++ windowFields lazy val sqlAggregations: Map[String, SQLAggregation] = aggregates.flatMap(f => SQLAggregation.fromField(f, this)).map(a => a.aggName -> a).toMap @@ -201,5 +203,4 @@ case class SQLSearchRequest( } yield () } - lazy val bucketScripts: Seq[Field] = select.fields.filter(_.isBucketScript) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala index f30944fa..7dbdabee 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala @@ -16,7 +16,12 @@ package app.softnetwork.elastic.sql.query -import app.softnetwork.elastic.sql.function.aggregate.{AggregateFunction, WindowFunction} +import app.softnetwork.elastic.sql.function.aggregate.{ + AggregateFunction, + BucketScriptAggregation, + COUNT, + WindowFunction +} import app.softnetwork.elastic.sql.function.{Function, FunctionChain, FunctionUtils} import app.softnetwork.elastic.sql.{ asString, @@ -97,7 +102,7 @@ case class Field( lazy val path: String = identifier.path - def isBucketScript: Boolean = functions.nonEmpty && !isAggregation && hasAggregation + def isBucketScript: Boolean = !isAggregation && hasAggregation } case object Except extends Expr("except") with TokenRegex @@ -143,70 +148,84 @@ case class SQLAggregation( ) { val nested: Boolean = nestedElement.nonEmpty val multivalued: Boolean = aggType.multivalued + val isGlobalMetric: Boolean = + distinct && (aggType match { + case COUNT => true + case _ => false + }) } object SQLAggregation { def fromField(field: Field, request: SQLSearchRequest): Option[SQLAggregation] = { - field.aggregateFunction.map { aggType => - import field._ - val sourceField = identifier.path + import field._ + + val aggType = aggregateFunction match { + case Some(agg) => agg + case None if field.isBucketScript => + BucketScriptAggregation(identifier).update(request) + case _ => return None + } - val direction = request.sorts.get(identifier.identifierName) + val sourceField = identifier.path - val _field = fieldAlias match { - case Some(alias) => alias.alias - case _ => sourceField - } + val direction = request.sorts.get(identifier.identifierName) - val distinct = identifier.distinct + val _field = fieldAlias match { + case Some(alias) => alias.alias + case _ => sourceField + } - val aggName = { - if (fieldAlias.isDefined) - _field - else if (distinct) - s"${aggType}_distinct_${sourceField.replace(".", "_")}" - else { - aggType match { - case th: WindowFunction => - s"${th.window.sql.toLowerCase}_${sourceField.replace(".", "_")}" - case _ => - s"${aggType}_${sourceField.replace(".", "_")}" + val distinct = identifier.distinct + + val aggName = { + if (fieldAlias.isDefined) + _field + else if (distinct) + s"${aggType}_distinct_${sourceField.replace(".", "_")}" + else { + aggType match { + case th: WindowFunction => + s"${th.window.sql.toLowerCase}_${sourceField.replace(".", "_")}" + case _ => + s"${aggType}_${sourceField.replace(".", "_")}" - } } } + } - var aggPath = Seq[String]() + var aggPath = Seq[String]() - val (aggFuncs, _) = FunctionUtils.aggregateAndTransformFunctions(identifier) + val (aggFuncs, _) = FunctionUtils.aggregateAndTransformFunctions(identifier) + if (!isBucketScript) require(aggFuncs.size == 1, s"Multiple aggregate functions not supported: $aggFuncs") - val nestedElement = identifier.nestedElement - - val nestedElements: Seq[NestedElement] = - nestedElement.map(n => NestedElements.buildNestedTrees(Seq(n))).getOrElse(Nil) - - nestedElements match { - case Nil => - aggPath ++= Seq(aggName) - case nestedElements => - def buildNested(n: NestedElement): Unit = { - aggPath ++= Seq(n.innerHitsName) - val children = n.children - if (children.nonEmpty) { - children.map(buildNested) - } - } - val root = nestedElements.head - buildNested(root) - request.having match { - case Some(_) => aggPath ++= Seq("filtered_agg") - case _ => + val nestedElement = identifier.nestedElement + + val nestedElements: Seq[NestedElement] = + nestedElement.map(n => NestedElements.buildNestedTrees(Seq(n))).getOrElse(Nil) + + nestedElements match { + case Nil => + aggPath ++= Seq(aggName) + case nestedElements => + def buildNested(n: NestedElement): Unit = { + aggPath ++= Seq(n.innerHitsName) + val children = n.children + if (children.nonEmpty) { + children.map(buildNested) } - aggPath ++= Seq(aggName) - } + } + val root = nestedElements.head + buildNested(root) + request.having match { + case Some(_) => aggPath ++= Seq("filtered_agg") + case _ => + } + aggPath ++= Seq(aggName) + } + Some( SQLAggregation( aggPath.mkString("."), _field, @@ -214,9 +233,9 @@ object SQLAggregation { distinct = distinct, aggType = aggType, direction = direction, - nestedElement = field.identifier.nestedElement, + nestedElement = identifier.nestedElement, buckets = request.buckets.map { _.name } ) - } + ) } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala index 41af2cfb..a983dfa6 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala @@ -354,8 +354,13 @@ sealed trait Expression extends FunctionChain with ElasticFilter with Criteria { } override lazy val isAggregation: Boolean = maybeValue match { - case Some(v: FunctionChain) => identifier.isAggregation || v.isAggregation - case _ => identifier.isAggregation + case Some(v) => identifier.isAggregation || v.isAggregation + case _ => identifier.isAggregation + } + + override lazy val hasAggregation: Boolean = maybeValue match { + case Some(v) => identifier.hasAggregation || v.hasAggregation + case _ => identifier.hasAggregation } def hasBucket: Boolean = identifier.hasBucket || maybeValue.exists { From 393cc62840f82e1daaa78a07885ccff47cda9e4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Thu, 27 Nov 2025 10:59:08 +0100 Subject: [PATCH 17/40] upgrade testContainers --- build.sbt | 2 +- es6/testkit/build.sbt | 2 +- project/Versions.scala | 2 +- testkit/build.sbt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/build.sbt b/build.sbt index cdf10c17..862d0c72 100644 --- a/build.sbt +++ b/build.sbt @@ -207,7 +207,7 @@ def testkitProject(esVersion: String, ss: Def.SettingsDefinition*): Project = { // "org.apache.logging.log4j" % "log4j-slf4j-impl" % Versions.log4j, "org.apache.logging.log4j" % "log4j-core" % Versions.log4j, "app.softnetwork.persistence" %% "persistence-core-testkit" % Versions.genericPersistence, - "org.testcontainers" % "elasticsearch" % Versions.testContainers excludeAll (jacksonExclusions: _*) + "org.testcontainers" % "testcontainers-elasticsearch" % Versions.testContainers excludeAll (jacksonExclusions: _*) ), Compile / compile := (Compile / compile).dependsOn(copyTestkit(esVersion)).value ) diff --git a/es6/testkit/build.sbt b/es6/testkit/build.sbt index 2a7976ac..e9080bdf 100644 --- a/es6/testkit/build.sbt +++ b/es6/testkit/build.sbt @@ -10,5 +10,5 @@ elastic4sTestkitDependencies(elasticSearchVersion.value) ++ Seq( // "org.apache.logging.log4j" % "log4j-slf4j-impl" % Versions.log4j, "org.apache.logging.log4j" % "log4j-core" % Versions.log4j, "app.softnetwork.persistence" %% "persistence-core-testkit" % Versions.genericPersistence, - "org.testcontainers" % "elasticsearch" % Versions.testContainers excludeAll (jacksonExclusions *) + "org.testcontainers" % "testcontainers-elasticsearch" % Versions.testContainers excludeAll (jacksonExclusions *) ) diff --git a/project/Versions.scala b/project/Versions.scala index e2ee035c..abb721ad 100644 --- a/project/Versions.scala +++ b/project/Versions.scala @@ -42,7 +42,7 @@ object Versions { val log4j = "2.8.2" - val testContainers = "1.18.0" + val testContainers = "2.0.2" val genericPersistence = "0.8.0" diff --git a/testkit/build.sbt b/testkit/build.sbt index dfd176b2..61f75182 100644 --- a/testkit/build.sbt +++ b/testkit/build.sbt @@ -11,5 +11,5 @@ libraryDependencies ++= elasticClientDependencies(elasticSearchVersion.value) ++ // "org.apache.logging.log4j" % "log4j-slf4j-impl" % Versions.log4j, "org.apache.logging.log4j" % "log4j-core" % Versions.log4j, "app.softnetwork.persistence" %% "persistence-core-testkit" % Versions.genericPersistence, - "org.testcontainers" % "elasticsearch" % Versions.testContainers excludeAll (jacksonExclusions: _*) + "org.testcontainers" % "testcontainers-elasticsearch" % Versions.testContainers excludeAll (jacksonExclusions: _*) ) From e73b7d9906bdfdaad87080abec95b0b2e54d5c9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Thu, 27 Nov 2025 11:02:45 +0100 Subject: [PATCH 18/40] update elastic conversion in order to load aggs top hits --- .../elastic/client/ElasticConversion.scala | 197 +++++++++++------- 1 file changed, 127 insertions(+), 70 deletions(-) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala index 785882a9..7709f2ca 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala @@ -155,13 +155,17 @@ trait ElasticConversion { parseAggregations(aggs, Map.empty, fieldAliases, aggregations) case (Some(hits), Some(aggs)) if hits.nonEmpty => - // Case 4 : Hits + global aggregations + // Case 4 : Hits + global aggregations + top_hits aggregations val globalMetrics = extractGlobalMetrics(aggs) + val allTopHits = extractAggregationValues( + extractAllTopHits(aggs, fieldAliases, aggregations), + aggregations + ) hits.map { hit => val source = extractSource(hit, fieldAliases) val metadata = extractHitMetadata(hit) val innerHits = extractInnerHits(hit, fieldAliases) - globalMetrics ++ source ++ metadata ++ innerHits + globalMetrics ++ allTopHits ++ source ++ metadata ++ innerHits } case _ => @@ -352,61 +356,10 @@ trait ElasticConversion { } else if (bucketAggs.isEmpty) { // No buckets : it is a leaf aggregation (metrics or top_hits) val metrics = extractMetrics(aggsNode) - val allTopHits = extractAllTopHits(aggsNode) + val allTopHits = extractAllTopHits(aggsNode, fieldAliases, aggregations) if (allTopHits.nonEmpty) { - // Process each top_hits aggregation with their names - val topHitsData = allTopHits.map { case (topHitName, hits) => - // Determine if it is a multivalued aggregation (array_agg, ...) - val hasMultipleValues = aggregations.get(topHitName) match { - case Some(agg) => agg.multivalued - case None => - // Fallback on naming convention if aggregation is not found - !topHitName.toLowerCase.matches("(first|last)_.*") - } - - val processedHits = hits.map { hit => - val source = extractSource(hit, fieldAliases) - if (hasMultipleValues) { - source.size match { - case 0 => null - case 1 => - // If only one field in source and multivalued, return the value directly - val value = source.head._2 - value match { - case list: List[_] => list - case map: Map[_, _] => map - case other => other - } - case _ => - // Multiple fields: return as object - val metadata = extractHitMetadata(hit) - val innerHits = extractInnerHits(hit, fieldAliases) - source ++ metadata ++ innerHits - } - } else { - val metadata = extractHitMetadata(hit) - val innerHits = extractInnerHits(hit, fieldAliases) - source ++ metadata ++ innerHits - } - } - - // If multipleValues = true OR more than one hit, return a list - // If multipleValues = false AND only one hit, return an object - topHitName -> { - if (!hasMultipleValues && processedHits.size == 1) - processedHits.head - else { - if (aggregations.get(topHitName).exists(_.distinct)) - processedHits.distinct - else - processedHits - } - } - } - - Seq(parentContext ++ metrics ++ topHitsData) - + Seq(parentContext ++ metrics ++ allTopHits) } else if (metrics.nonEmpty || parentContext.nonEmpty) { Seq(parentContext ++ metrics) } else { @@ -414,7 +367,7 @@ trait ElasticConversion { } } else { // Handle each aggregation with buckets - bucketAggs.flatMap { case (aggName, buckets, aggValue) => + bucketAggs.flatMap { case (aggName, buckets, _) => buckets.flatMap { bucket => val bucketKey = extractBucketKey(bucket) val docCount = Option(bucket.get("doc_count")) @@ -572,23 +525,80 @@ trait ElasticConversion { } /** Extract all top_hits aggregations with their names and hits */ - def extractAllTopHits(aggsNode: JsonNode): Map[String, Seq[JsonNode]] = { + def extractAllTopHits( + aggsNode: JsonNode, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] + ): Map[String, Any] = { if (!aggsNode.isObject) return Map.empty - aggsNode - .properties() - .asScala - .collect { - case entry if entry.getValue.has("hits") => - val normalizedKey = normalizeAggregationKey(entry.getKey) - val hitsNode = entry.getValue.path("hits").path("hits") - val hits = if (hitsNode.isArray) { - hitsNode.elements().asScala.toSeq - } else { - Seq.empty + val allTopHits = + aggsNode + .properties() + .asScala + .collect { + case entry if entry.getValue.has("hits") => + val normalizedKey = normalizeAggregationKey(entry.getKey) + val hitsNode = entry.getValue.path("hits").path("hits") + val hits = if (hitsNode.isArray) { + hitsNode.elements().asScala.toSeq + } else { + Seq.empty + } + normalizedKey -> hits + } + .toMap + + // Process each top_hits aggregation with their names + val row = allTopHits.map { case (topHitName, hits) => + // Determine if it is a multivalued aggregation (array_agg, ...) + val hasMultipleValues = aggregations.get(topHitName) match { + case Some(agg) => agg.multivalued + case None => + // Fallback on naming convention if aggregation is not found + !topHitName.toLowerCase.matches("(first|last)_.*") + } + + val processedHits = hits.map { hit => + val source = extractSource(hit, fieldAliases) + if (hasMultipleValues) { + source.size match { + case 0 => null + case 1 => + // If only one field in source and multivalued, return the value directly + val value = source.head._2 + value match { + case list: List[_] => list + case map: Map[_, _] => map + case other => other + } + case _ => + // Multiple fields: return as object + val metadata = extractHitMetadata(hit) + val innerHits = extractInnerHits(hit, fieldAliases) + source ++ metadata ++ innerHits } - normalizedKey -> hits + } else { + val metadata = extractHitMetadata(hit) + val innerHits = extractInnerHits(hit, fieldAliases) + source ++ metadata ++ innerHits + } } - .toMap + + // If multipleValues = true OR more than one hit, return a list + // If multipleValues = false AND only one hit, return an object + topHitName -> { + if (!hasMultipleValues && processedHits.size == 1) + processedHits.head + else { + if (aggregations.get(topHitName).exists(_.distinct)) + processedHits.distinct + else + processedHits + } + } + } + + row } /** Extract global metrics from aggregations (for hits + aggs case) @@ -622,6 +632,53 @@ trait ElasticConversion { .toMap } + def extractAggregationValues( + row: Map[String, Any], + aggregations: Map[String, ClientAggregation] + ): Map[String, Any] = { + val values = aggregations + .map { wf => + val fieldName = wf._1 + + val aggType = wf._2.aggType + + val sourceField = wf._2.sourceField + + // Get value from row (already processed by ElasticConversion) + val value = row.get(fieldName).orElse { + None + } + + val validatedValue = + value match { + case Some(m: Map[String, Any]) => + m.get(sourceField) match { + case Some(v) => + aggType match { + case AggregationType.ArrayAgg => + v match { + case l: List[_] => + Some(l) + case other => + Some(List(other)) // Wrap into a List + } + case _ => Some(v) + } + case None => + None + } + case other => + other + } + + fieldName -> validatedValue + } + .collect { case (name, Some(value)) => + name -> value + } + values + } + /** Convert recursively a JsonNode to Map */ def jsonNodeToMap(node: JsonNode, fieldAliases: Map[String, String]): Map[String, Any] = { From 6555fefec67808a041058ee6f4a474f00cad5f98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Thu, 27 Nov 2025 11:04:58 +0100 Subject: [PATCH 19/40] rename bucketPath to nestedPath --- .../sql/bridge/ElasticAggregation.scala | 30 +++++++++---------- .../sql/bridge/ElasticAggregation.scala | 30 +++++++++---------- .../softnetwork/elastic/sql/query/From.scala | 4 +-- .../elastic/sql/query/GroupBy.scala | 4 +-- 4 files changed, 34 insertions(+), 34 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 0948f543..a0c9f63f 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -308,7 +308,7 @@ object ElasticAggregation { ): Option[Aggregation] = { buckets.reverse.foldLeft(Option.empty[Aggregation]) { (current, bucket) => // Determine the bucketPath of the current bucket - val currentBucketPath = bucket.identifier.path + val currentNestedPath = bucket.identifier.path val aggScript = if (!bucket.isBucketScript && bucket.shouldBeScripted) { @@ -366,7 +366,7 @@ object ElasticAggregation { bucketsDirection.get(bucket.identifier.identifierName) match { case Some(direction) => DateHistogramAggregation(bucket.name, calendarInterval = interval) - .field(currentBucketPath) + .field(currentNestedPath) .minDocCount(1) .order(direction match { case Asc => HistogramOrder("_key", asc = true) @@ -374,7 +374,7 @@ object ElasticAggregation { }) case _ => DateHistogramAggregation(bucket.name, calendarInterval = interval) - .field(currentBucketPath) + .field(currentNestedPath) .minDocCount(1) } } @@ -401,14 +401,14 @@ object ElasticAggregation { // Standard terms aggregation bucketsDirection.get(bucket.identifier.identifierName) match { case Some(direction) => - termsAgg(bucket.name, currentBucketPath) + termsAgg(bucket.name, currentNestedPath) .minDocCount(1) .order(Seq(direction match { case Asc => TermsOrder("_key", asc = true) case _ => TermsOrder("_key", asc = false) })) case _ => - termsAgg(bucket.name, currentBucketPath) + termsAgg(bucket.name, currentNestedPath) .minDocCount(1) } } @@ -504,7 +504,7 @@ object ElasticAggregation { allElasticAggregations: Seq[ElasticAggregation] ): String = { - val currentBucketPath = nested.map(_.bucketPath).getOrElse("") + val currentNestedPath = nested.map(_.nestedPath).getOrElse("") // No filtering val fullScript = MetricSelectorScript @@ -514,7 +514,7 @@ object ElasticAggregation { .replaceAll("1 == 1", "") .trim - // println(s"[DEBUG] currentBucketPath = $currentBucketPath") + // println(s"[DEBUG] currentNestedPath = $currentNestedPath") // println(s"[DEBUG] fullScript (complete) = $fullScript") if (fullScript.isEmpty) { @@ -536,17 +536,17 @@ object ElasticAggregation { ) match { case Some(elasticAgg) => val metricBucketPath = elasticAgg.nestedElement - .map(_.bucketPath) + .map(_.nestedPath) .getOrElse("") // println( // s"[DEBUG] metricName = $metricName, metricBucketPath = $metricBucketPath, aggType = ${elasticAgg.agg.getClass.getSimpleName}" // ) - val belongsToLevel = metricBucketPath == currentBucketPath + val belongsToLevel = metricBucketPath == currentNestedPath val isDirectChildAndAccessible = - if (isDirectChild(metricBucketPath, currentBucketPath)) { + if (isDirectChild(metricBucketPath, currentNestedPath)) { // Check if it's a "global" metric (cardinality, etc.) elasticAgg.isGlobalMetric } else { @@ -562,7 +562,7 @@ object ElasticAggregation { case None => // println(s"[DEBUG] metricName = $metricName NOT FOUND") - currentBucketPath.isEmpty + currentNestedPath.isEmpty } } } @@ -608,7 +608,7 @@ object ElasticAggregation { allAggregations: Seq[SQLAggregation] ): Map[String, String] = { val currentBucketPath = - bucketScriptAggregation.identifier.nestedElement.map(_.bucketPath).getOrElse("") + bucketScriptAggregation.identifier.nestedElement.map(_.nestedPath).getOrElse("") // Extract ALL metrics paths val allMetricsPaths = bucketScriptAggregation.params.keys val result = @@ -616,7 +616,7 @@ object ElasticAggregation { allAggregations.find(agg => agg.aggName == metricName || agg.field == metricName) match { case Some(sqlAgg) => val metricBucketPath = sqlAgg.nestedElement - .map(_.bucketPath) + .map(_.nestedPath) .getOrElse("") if (metricBucketPath == currentBucketPath) { // Metric of the same level @@ -659,7 +659,7 @@ object ElasticAggregation { allElasticAggregations: Seq[ElasticAggregation] ): Map[String, String] = { - val currentBucketPath = nested.map(_.bucketPath).getOrElse("") + val currentBucketPath = nested.map(_.nestedPath).getOrElse("") // Extract ALL metrics paths val allMetricsPaths = criteria.extractAllMetricsPath @@ -674,7 +674,7 @@ object ElasticAggregation { ) match { case Some(elasticAgg) => val metricBucketPath = elasticAgg.nestedElement - .map(_.bucketPath) + .map(_.nestedPath) .getOrElse("") // println( diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 4c3d3f6a..cb1a390e 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -308,7 +308,7 @@ object ElasticAggregation { ): Option[Aggregation] = { buckets.reverse.foldLeft(Option.empty[Aggregation]) { (current, bucket) => // Determine the bucketPath of the current bucket - val currentBucketPath = bucket.identifier.path + val currentNestedPath = bucket.identifier.path val aggScript = if (!bucket.isBucketScript && bucket.shouldBeScripted) { @@ -366,7 +366,7 @@ object ElasticAggregation { bucketsDirection.get(bucket.identifier.identifierName) match { case Some(direction) => DateHistogramAggregation(bucket.name, interval = interval) - .field(currentBucketPath) + .field(currentNestedPath) .minDocCount(1) .order(direction match { case Asc => HistogramOrder("_key", asc = true) @@ -374,7 +374,7 @@ object ElasticAggregation { }) case _ => DateHistogramAggregation(bucket.name, interval = interval) - .field(currentBucketPath) + .field(currentNestedPath) .minDocCount(1) } } @@ -401,14 +401,14 @@ object ElasticAggregation { // Standard terms aggregation bucketsDirection.get(bucket.identifier.identifierName) match { case Some(direction) => - termsAgg(bucket.name, currentBucketPath) + termsAgg(bucket.name, currentNestedPath) .minDocCount(1) .order(Seq(direction match { case Asc => TermsOrder("_key", asc = true) case _ => TermsOrder("_key", asc = false) })) case _ => - termsAgg(bucket.name, currentBucketPath) + termsAgg(bucket.name, currentNestedPath) .minDocCount(1) } } @@ -504,7 +504,7 @@ object ElasticAggregation { allElasticAggregations: Seq[ElasticAggregation] ): String = { - val currentBucketPath = nested.map(_.bucketPath).getOrElse("") + val currentNestedPath = nested.map(_.nestedPath).getOrElse("") // No filtering val fullScript = MetricSelectorScript @@ -514,7 +514,7 @@ object ElasticAggregation { .replaceAll("1 == 1", "") .trim - // println(s"[DEBUG] currentBucketPath = $currentBucketPath") + // println(s"[DEBUG] currentNestedPath = $currentNestedPath") // println(s"[DEBUG] fullScript (complete) = $fullScript") if (fullScript.isEmpty) { @@ -536,17 +536,17 @@ object ElasticAggregation { ) match { case Some(elasticAgg) => val metricBucketPath = elasticAgg.nestedElement - .map(_.bucketPath) + .map(_.nestedPath) .getOrElse("") // println( // s"[DEBUG] metricName = $metricName, metricBucketPath = $metricBucketPath, aggType = ${elasticAgg.agg.getClass.getSimpleName}" // ) - val belongsToLevel = metricBucketPath == currentBucketPath + val belongsToLevel = metricBucketPath == currentNestedPath val isDirectChildAndAccessible = - if (isDirectChild(metricBucketPath, currentBucketPath)) { + if (isDirectChild(metricBucketPath, currentNestedPath)) { // Check if it's a "global" metric (cardinality, etc.) elasticAgg.isGlobalMetric } else { @@ -562,7 +562,7 @@ object ElasticAggregation { case None => // println(s"[DEBUG] metricName = $metricName NOT FOUND") - currentBucketPath.isEmpty + currentNestedPath.isEmpty } } } @@ -608,7 +608,7 @@ object ElasticAggregation { allAggregations: Seq[SQLAggregation] ): Map[String, String] = { val currentBucketPath = - bucketScriptAggregation.identifier.nestedElement.map(_.bucketPath).getOrElse("") + bucketScriptAggregation.identifier.nestedElement.map(_.nestedPath).getOrElse("") // Extract ALL metrics paths val allMetricsPaths = bucketScriptAggregation.params.keys val result = @@ -616,7 +616,7 @@ object ElasticAggregation { allAggregations.find(agg => agg.aggName == metricName || agg.field == metricName) match { case Some(sqlAgg) => val metricBucketPath = sqlAgg.nestedElement - .map(_.bucketPath) + .map(_.nestedPath) .getOrElse("") if (metricBucketPath == currentBucketPath) { // Metric of the same level @@ -659,7 +659,7 @@ object ElasticAggregation { allElasticAggregations: Seq[ElasticAggregation] ): Map[String, String] = { - val currentBucketPath = nested.map(_.bucketPath).getOrElse("") + val currentBucketPath = nested.map(_.nestedPath).getOrElse("") // Extract ALL metrics paths val allMetricsPaths = criteria.extractAllMetricsPath @@ -674,7 +674,7 @@ object ElasticAggregation { ) match { case Some(elasticAgg) => val metricBucketPath = elasticAgg.nestedElement - .map(_.bucketPath) + .map(_.nestedPath) .getOrElse("") // println( diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/From.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/From.scala index 25df23ac..94edf995 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/From.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/From.scala @@ -218,9 +218,9 @@ case class NestedElement( } } - lazy val bucketPath: String = { + lazy val nestedPath: String = { parent match { - case Some(p) => s"${p.bucketPath}>$innerHitsName" + case Some(p) => s"${p.nestedPath}>$innerHitsName" case None => innerHitsName } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala index f3023da4..4f94e548 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala @@ -89,9 +89,9 @@ case class Bucket( lazy val name: String = identifier.fieldAlias.getOrElse(sourceBucket.replace(".", "_")) - lazy val bucketPath: String = { + lazy val nestedPath: String = { identifier.nestedElement match { - case Some(ne) => ne.bucketPath + case Some(ne) => ne.nestedPath case None => "" // Root level } } From 081027a19515f0f9811a81778803ea17855618fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Thu, 27 Nov 2025 11:16:24 +0100 Subject: [PATCH 20/40] add flag indicating whether or not an aggregate is a window function with partitioning --- .../elastic/client/ScrollApi.scala | 7 +-- .../elastic/client/SearchApi.scala | 57 ++----------------- .../softnetwork/elastic/client/package.scala | 8 ++- .../client/ElasticConversionSpec.scala | 4 +- .../sql/function/aggregate/package.scala | 22 +++++-- .../elastic/sql/query/SQLSearchRequest.scala | 4 +- .../elastic/sql/query/Select.scala | 2 +- 7 files changed, 32 insertions(+), 72 deletions(-) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala index 5ea8bee5..4489c6c7 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala @@ -122,12 +122,7 @@ trait ScrollApi extends ElasticClientHelpers { )(implicit system: ActorSystem): Source[(Map[String, Any], ScrollMetrics), NotUsed] = { sql.request match { case Some(Left(single)) => - if ( - single.windowFunctions.nonEmpty && (single.fields.nonEmpty || single.windowFunctions - .flatMap(_.fields) - .distinct - .size > 1) - ) + if (single.windowFunctions.nonEmpty) return scrollWithWindowEnrichment(sql, single, config) val sqlRequest = single.copy(score = sql.score) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala index 79bc30b3..b5d0dcc5 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala @@ -68,12 +68,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { collection.immutable.Seq(single.sources: _*), sql = Some(sql.query) ) - if ( - single.windowFunctions.nonEmpty && (single.fields.nonEmpty || single.windowFunctions - .flatMap(_.fields) - .distinct - .size > 1) - ) + if (single.windowBuckets.nonEmpty) searchWithWindowEnrichment(sql, single) else singleSearch(elasticQuery, single.fieldAliases, single.sqlAggregations) @@ -829,7 +824,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { s"✅ Successfully executed search with inner hits in indices '${elasticQuery.indices.mkString(",")}'" ) ElasticResult.attempt { - new JsonParser().parse(response).getAsJsonObject + JsonParser.parseString(response).getAsJsonObject } match { case ElasticFailure(error) => logger.error( @@ -918,7 +913,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { s"✅ Successfully executed multi-search inner hits with ${elasticQueries.queries.size} queries" ) ElasticResult.attempt { - new JsonParser().parse(response).getAsJsonObject + JsonParser.parseString(response).getAsJsonObject } match { case ElasticFailure(error) => logger.error( @@ -1287,51 +1282,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { aggregations: Map[String, ClientAggregation] ): WindowValues = { - val values = aggregations - .filter(_._2.window) - .map { wf => - val fieldName = wf._1 - - val aggType = wf._2.aggType - - val sourceField = wf._2.sourceField - - // Get value from row (already processed by ElasticConversion) - val value = row.get(fieldName).orElse { - logger.warn(s"⚠️ Window function '$fieldName' not found in aggregation result") - None - } - - val validatedValue = - value match { - case Some(m: Map[String, Any]) => - m.get(sourceField) match { - case Some(v) => - aggType match { - case AggregationType.ArrayAgg => - v match { - case l: List[_] => - Some(l) - case other => - logger.warn( - s"⚠️ Expected List for ARRAY_AGG '$fieldName', got ${other.getClass.getSimpleName}" - ) - Some(List(other)) // Wrap into a List - } - case _ => Some(v) - } - case None => - None - } - case other => - other - } - - fieldName -> validatedValue - } - .collect { case (name, Some(value)) => - name -> value - } + val values = extractAggregationValues(row, aggregations) WindowValues(values) } diff --git a/core/src/main/scala/app/softnetwork/elastic/client/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/package.scala index 4eb2d5ef..5e56c5f0 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/package.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/package.scala @@ -137,13 +137,17 @@ package object client extends SerializationApi { * @param distinct * - when the aggregation is multivalued define if its values should be returned distinct or * not + * @param sourceField + * - the source field of the aggregation + * @param windowing + * - whether the aggregation is a window function with partitioning */ case class ClientAggregation( aggName: String, aggType: AggregationType.AggregationType, distinct: Boolean, sourceField: String, - window: Boolean + windowing: Boolean ) { def multivalued: Boolean = aggType == AggregationType.ArrayAgg def singleValued: Boolean = !multivalued @@ -166,7 +170,7 @@ package object client extends SerializationApi { aggType, agg.distinct, agg.sourceField, - agg.aggType.isInstanceOf[WindowFunction] + agg.aggType.isWindowing ) } } diff --git a/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala index 620a403c..ac977bf8 100644 --- a/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala +++ b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala @@ -190,7 +190,7 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver aggType = AggregationType.ArrayAgg, distinct = false, "name", - window = true + windowing = true ) ) ) match { @@ -643,7 +643,7 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver aggType = AggregationType.ArrayAgg, distinct = false, "name", - window = true + windowing = true ) ) ) match { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala index c654e92d..0d59e1da 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala @@ -29,6 +29,10 @@ package object aggregate { override def hasAggregation: Boolean = true def isBucketScript: Boolean = false + + /** Indicates whether this aggregation is a windowing function with partitioning or not + */ + def isWindowing: Boolean = false } case object COUNT extends Expr("COUNT") with AggregateFunction @@ -97,6 +101,8 @@ package object aggregate { def window: Window def limit: Option[Limit] + override def isWindowing: Boolean = buckets.nonEmpty + lazy val buckets: Seq[Bucket] = partitionBy.map(identifier => Bucket(identifier, None)) lazy val bucketNames: Map[String, Bucket] = buckets.map { b => @@ -120,12 +126,16 @@ package object aggregate { val updated = this .withPartitionBy(partitionBy = partitionBy.map(_.update(request))) updated.withFields( - fields = request.select.fields - .filterNot(field => - field.isAggregation || request.bucketNames.keys.toSeq - .contains(field.identifier.identifierName) - ) - .filterNot(f => request.excludes.contains(f.sourceField)) + fields = if (isWindowing) { + request.select.fields + .filterNot(field => + field.isAggregation || request.bucketNames.keys.toSeq + .contains(field.identifier.identifierName) + ) + .filterNot(f => request.excludes.contains(f.sourceField)) + } else { + updated.fields + } ) } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala index 77a2d4f5..4e925ac6 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala @@ -122,7 +122,7 @@ case class SQLSearchRequest( } lazy val fields: Seq[String] = { - if (aggregates.isEmpty && buckets.isEmpty) + if (buckets.isEmpty) select.fields .filterNot(_.isScriptField) .filterNot(_.nested) @@ -140,7 +140,7 @@ case class SQLSearchRequest( lazy val aggregates: Seq[Field] = select.fields .filter(f => f.isAggregation || f.isBucketScript) - .filterNot(_.windows.isDefined) ++ windowFields + .filterNot(_.isWindow) ++ windowFields lazy val sqlAggregations: Map[String, SQLAggregation] = aggregates.flatMap(f => SQLAggregation.fromField(f, this)).map(a => a.aggName -> a).toMap diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala index 7dbdabee..526e8b4f 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala @@ -73,7 +73,7 @@ case class Field( lazy val windows: Option[WindowFunction] = functions.collectFirst { case th: WindowFunction => th } - def isWindow: Boolean = windows.isDefined + def isWindow: Boolean = windows.nonEmpty //.exists(_.partitionBy.nonEmpty) def update(request: SQLSearchRequest): Field = { windows match { From 97c7faf19144078a0662e202e74b36912bd7d0dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Thu, 27 Nov 2025 11:22:26 +0100 Subject: [PATCH 21/40] update logs for scroll and search --- .../elastic/client/ScrollApi.scala | 2 +- .../elastic/client/SearchApi.scala | 38 +++++++++---------- .../softnetwork/elastic/client/package.scala | 18 ++++++++- 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala index 4489c6c7..6d738565 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala @@ -347,7 +347,7 @@ trait ScrollApi extends ElasticClientHelpers { val strategy = determineScrollStrategy(elasticQuery, aggregations) logger.info( - s"Using scroll strategy: $strategy for query on ${elasticQuery.indices.mkString(", ")}" + s"Using scroll strategy: $strategy for query \n$elasticQuery" ) strategy match { diff --git a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala index b5d0dcc5..a773c6d8 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala @@ -132,19 +132,19 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { val indices = elasticQuery.indices.mkString(",") logger.debug( - s"🔍 Searching with query \n${sql.getOrElse(query)}\nin indices '$indices'" + s"🔍 Searching with query \n$elasticQuery\nin indices '$indices'" ) executeSingleSearch(elasticQuery) match { case ElasticSuccess(Some(response)) => logger.info( - s"✅ Successfully executed search for query \n${sql.getOrElse(query)}\nin indices '$indices'" + s"✅ Successfully executed search for query \n$elasticQuery\nin indices '$indices'" ) val aggs = aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) ElasticResult.fromTry(parseResponse(response, fieldAliases, aggs)) match { case success @ ElasticSuccess(_) => logger.info( - s"✅ Successfully parsed search results for query \n${sql.getOrElse(query)}\nin indices '$indices'" + s"✅ Successfully parsed search results for query \n$elasticQuery\nin indices '$indices'" ) ElasticResult.success( ElasticResponse( @@ -170,8 +170,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { case ElasticSuccess(_) => val error = ElasticError( - message = - s"Failed to execute search for query \n${sql.getOrElse(query)}\nin indices '$indices'", + message = s"Failed to execute search for query \n$elasticQuery\nin indices '$indices'", index = Some(indices), operation = Some("search") ) @@ -231,19 +230,19 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { ) logger.debug( - s"🔍 Multi-searching with query \n${sql.getOrElse(query)}" + s"🔍 Multi-searching with query \n$elasticQueries" ) executeMultiSearch(elasticQueries) match { case ElasticSuccess(Some(response)) => logger.info( - s"✅ Successfully executed multi-search for query \n${sql.getOrElse(query)}" + s"✅ Successfully executed multi-search for query \n$elasticQueries" ) val aggs = aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) ElasticResult.fromTry(parseResponse(response, fieldAliases, aggs)) match { case success @ ElasticSuccess(_) => logger.info( - s"✅ Successfully parsed multi-search results for query '${sql.getOrElse(query)}'" + s"✅ Successfully parsed multi-search results for query '$elasticQueries'" ) ElasticResult.success( ElasticResponse( @@ -256,7 +255,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { ) case ElasticFailure(error) => logger.error( - s"❌ Failed to parse multi-search results for query \n${sql.getOrElse(query)}\n -> ${error.message}" + s"❌ Failed to parse multi-search results for query \n$elasticQueries\n -> ${error.message}" ) ElasticResult.failure( error.copy( @@ -267,14 +266,14 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { case ElasticSuccess(_) => val error = ElasticError( - message = s"Failed to execute multi-search for query \n${sql.getOrElse(query)}", + message = s"Failed to execute multi-search for query \n$elasticQueries", operation = Some("multiSearch") ) logger.error(s"❌ ${error.message}") ElasticResult.failure(error) case ElasticFailure(error) => logger.error( - s"❌ Failed to execute multi-search for query \n${sql.getOrElse(query)}\n -> ${error.message}" + s"❌ Failed to execute multi-search for query \n$elasticQueries\n -> ${error.message}" ) ElasticResult.failure( error.copy( @@ -358,13 +357,13 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { executeSingleSearchAsync(elasticQuery).flatMap { case ElasticSuccess(Some(response)) => logger.info( - s"✅ Successfully executed asynchronous search for query \n${sql.getOrElse(query)}\nin indices '$indices'" + s"✅ Successfully executed asynchronous search for query \n$elasticQuery\nin indices '$indices'" ) val aggs = aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) ElasticResult.fromTry(parseResponse(response, fieldAliases, aggs)) match { case success @ ElasticSuccess(_) => logger.info( - s"✅ Successfully parsed search results for query \n${sql.getOrElse(query)}\nin indices '$indices'" + s"✅ Successfully parsed search results for query \n$elasticQuery\nin indices '$indices'" ) Future.successful( ElasticResult.success( @@ -395,7 +394,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { val error = ElasticError( message = - s"Failed to execute asynchronous search for query \n${sql.getOrElse(query)}\nin indices '$indices'", + s"Failed to execute asynchronous search for query \n$elasticQuery\nin indices '$indices'", index = Some(elasticQuery.indices.mkString(",")), operation = Some("searchAsync") ) @@ -443,13 +442,13 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { executeMultiSearchAsync(elasticQueries).flatMap { case ElasticSuccess(Some(response)) => logger.info( - s"✅ Successfully executed asynchronous multi-search for query \n${sql.getOrElse(query)}" + s"✅ Successfully executed asynchronous multi-search for query \n$elasticQueries" ) val aggs = aggregations.map(kv => kv._1 -> implicitly[ClientAggregation](kv._2)) ElasticResult.fromTry(parseResponse(response, fieldAliases, aggs)) match { case success @ ElasticSuccess(_) => logger.info( - s"✅ Successfully parsed multi-search results for query '${sql.getOrElse(query)}'" + s"✅ Successfully parsed multi-search results for query '$elasticQueries'" ) Future.successful( ElasticResult.success( @@ -464,7 +463,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { ) case ElasticFailure(error) => logger.error( - s"❌ Failed to parse multi-search results for query \n${sql.getOrElse(query)}\n -> ${error.message}" + s"❌ Failed to parse multi-search results for query \n$elasticQueries\n -> ${error.message}" ) Future.successful( ElasticResult.failure( @@ -477,15 +476,14 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { case ElasticSuccess(_) => val error = ElasticError( - message = - s"Failed to execute asynchronous multi-search for query \n${sql.getOrElse(query)}", + message = s"Failed to execute asynchronous multi-search for query \n$elasticQueries", operation = Some("multiSearchAsync") ) logger.error(s"❌ ${error.message}") Future.successful(ElasticResult.failure(error)) case ElasticFailure(error) => logger.error( - s"❌ Failed to execute asynchronous multi-search for query \n${sql.getOrElse(query)}\n -> ${error.message}" + s"❌ Failed to execute asynchronous multi-search for query \n$elasticQueries\n -> ${error.message}" ) Future.successful( ElasticResult.failure( diff --git a/core/src/main/scala/app/softnetwork/elastic/client/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/package.scala index 5e56c5f0..012082cd 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/package.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/package.scala @@ -73,7 +73,23 @@ package object client extends SerializationApi { indices: Seq[String], types: Seq[String] = Seq.empty, sql: Option[String] = None - ) + ) { + override def toString: String = s"""ElasticQuery: + | Indices: ${indices.mkString(",")} + | Types: ${types.mkString(",")} + | SQL: ${sql.getOrElse("")} + | Query: $query + |""".stripMargin + } + + case class ElasticQueries(queries: List[ElasticQuery], sql: Option[String] = None) { + val multiQuery: String = queries.map(_.query).mkString("\n") + + val sqlQuery: String = sql + .orElse( + Option(queries.flatMap(_.sql).mkString("\nUNION ALL\n")) + ) + .getOrElse("") case class ElasticQueries(queries: List[ElasticQuery], sql: Option[String] = None) From 98080d9c161c4cc2e9ce3a75bbee7ae6c1eeec9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Thu, 27 Nov 2025 11:22:37 +0100 Subject: [PATCH 22/40] update logs for scroll and search --- .../scala/app/softnetwork/elastic/client/package.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/package.scala index 012082cd..3ac38763 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/package.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/package.scala @@ -91,7 +91,12 @@ package object client extends SerializationApi { ) .getOrElse("") - case class ElasticQueries(queries: List[ElasticQuery], sql: Option[String] = None) + override def toString: String = s""" + |ElasticQueries: + | SQL: ${sql.getOrElse(sqlQuery)} + | Multiquery: $multiQuery + |""".stripMargin + } /** Retry configuration */ From 8dc008246ab0469406cc032698dfe52733ed1d36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Thu, 27 Nov 2025 11:23:22 +0100 Subject: [PATCH 23/40] do not update order by for window functions --- .../elastic/sql/function/aggregate/package.scala | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala index 0d59e1da..12287ae8 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala @@ -155,8 +155,7 @@ package object aggregate { .update(request) .asInstanceOf[FirstValue] .copy( - identifier = identifier.update(request), - orderBy = orderBy.update(request) + identifier = identifier.update(request) ) } @@ -175,8 +174,7 @@ package object aggregate { .update(request) .asInstanceOf[LastValue] .copy( - identifier = identifier.update(request), - orderBy = orderBy.update(request) + identifier = identifier.update(request) ) } @@ -196,7 +194,6 @@ package object aggregate { .asInstanceOf[ArrayAgg] .copy( identifier = identifier.update(request), - orderBy = orderBy.update(request), limit = limit.orElse(request.limit) ) override def multivalued: Boolean = true From 0ca06f95d945de0d53fa0ad44a0484c3e29fbc57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Fri, 28 Nov 2025 15:58:01 +0100 Subject: [PATCH 24/40] add support for multiple bucket trees (for windowing) and add aggregations to the corresponding bucket when required --- .../sql/bridge/ElasticAggregation.scala | 384 +++++++++--------- .../elastic/sql/bridge/package.scala | 45 +- .../elastic/client/SearchApi.scala | 8 +- .../softnetwork/elastic/client/package.scala | 6 +- .../client/ElasticConversionSpec.scala | 6 +- .../sql/bridge/ElasticAggregation.scala | 384 +++++++++--------- .../elastic/sql/bridge/package.scala | 49 ++- .../sql/function/aggregate/package.scala | 54 ++- .../app/softnetwork/elastic/sql/package.scala | 51 ++- .../elastic/sql/query/GroupBy.scala | 204 +++++++++- .../elastic/sql/query/SQLSearchRequest.scala | 24 +- .../elastic/sql/query/Select.scala | 19 +- 12 files changed, 780 insertions(+), 454 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index a0c9f63f..5ba6de78 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -20,8 +20,9 @@ import app.softnetwork.elastic.sql.PainlessContext import app.softnetwork.elastic.sql.`type`.SQLTemporal import app.softnetwork.elastic.sql.query.{ Asc, - Bucket, BucketIncludesExcludes, + BucketNode, + BucketTree, Criteria, Desc, Field, @@ -79,7 +80,8 @@ case class ElasticAggregation( aggType: AggregateFunction, agg: AbstractAggregation, direction: Option[SortOrder] = None, - nestedElement: Option[NestedElement] = None + nestedElement: Option[NestedElement] = None, + bucketPath: String = "" ) { val nested: Boolean = nestedElement.nonEmpty val filtered: Boolean = filteredAgg.nonEmpty @@ -115,7 +117,7 @@ object ElasticAggregation { val distinct = identifier.distinct - val aggType = { + var aggType = { if (isBucketScript) { BucketScriptAggregation(identifier) } else @@ -208,13 +210,13 @@ object ElasticAggregation { sort.order match { case Some(Desc) => th.window match { - case LAST_VALUE => FieldSort(sort.field.aliasOrName).asc() - case _ => FieldSort(sort.field.aliasOrName).desc() + case LAST_VALUE => FieldSort(sort.field.name).asc() + case _ => FieldSort(sort.field.name).desc() } case _ => th.window match { - case LAST_VALUE => FieldSort(sort.field.aliasOrName).desc() - case _ => FieldSort(sort.field.aliasOrName).asc() + case LAST_VALUE => FieldSort(sort.field.name).desc() + case _ => FieldSort(sort.field.name).asc() } } ) @@ -224,6 +226,7 @@ object ElasticAggregation { case Some(sqlAgg) => sqlAgg.aggType match { case bsa: BucketScriptAggregation => + aggType = bsa extractMetricsPathForBucketScript(bsa, allAggregations.values.toSeq) case _ => Map.empty } @@ -284,6 +287,12 @@ object ElasticAggregation { Some(nestedAgg) } + val bucketPath = + aggType.bucketPath match { + case paths if paths.isEmpty => identifier.bucketPath + case other => other + } + ElasticAggregation( aggPath.mkString("."), field, @@ -293,205 +302,218 @@ object ElasticAggregation { aggType = aggType, agg = _agg, direction = direction, - nestedElement = nestedElement + nestedElement = nestedElement, + bucketPath = bucketPath ) } def buildBuckets( - buckets: Seq[Bucket], + buckets: Seq[Seq[BucketNode]], bucketsDirection: Map[String, SortOrder], - aggregations: Seq[AbstractAggregation], + aggs: Seq[ElasticAggregation], aggregationsDirection: Map[String, SortOrder], having: Option[Criteria], nested: Option[NestedElement], allElasticAggregations: Seq[ElasticAggregation] - ): Option[Aggregation] = { - buckets.reverse.foldLeft(Option.empty[Aggregation]) { (current, bucket) => - // Determine the bucketPath of the current bucket - val currentNestedPath = bucket.identifier.path - - val aggScript = - if (!bucket.isBucketScript && bucket.shouldBeScripted) { - val context = PainlessContext() - val painless = bucket.painless(Some(context)) - Some(Script(s"$context$painless").lang("painless")) - } else { - None - } + ): Seq[Aggregation] = { + println( + s"[DEBUG] buildBuckets called with buckets: \n${BucketTree(buckets.flatMap(_.headOption))}" + ) + buckets.flatMap { tree => + tree.reverse.foldLeft(Option.empty[Aggregation]) { (current, node) => + val currentBucketPath = node.bucketPath + + val bucket = node.bucket + + val aggregations = + aggs.filter(agg => agg.bucketPath == currentBucketPath).map(_.agg) + + // Determine the nested path of the current bucket + val currentBucketNestedPath = bucket.identifier.path - var agg: Aggregation = { - bucket.out match { - case _: SQLTemporal => - val functions = bucket.identifier.functions - val interval: Option[DateHistogramInterval] = - if (functions.size == 1) { - functions.head match { - case trunc: DateTrunc => - trunc.unit match { - case TimeUnit.YEARS => Option(DateHistogramInterval.Year) - case TimeUnit.QUARTERS => Option(DateHistogramInterval.Quarter) - case TimeUnit.MONTHS => Option(DateHistogramInterval.Month) - case TimeUnit.WEEKS => Option(DateHistogramInterval.Week) - case TimeUnit.DAYS => Option(DateHistogramInterval.Day) - case TimeUnit.HOURS => Option(DateHistogramInterval.Hour) - case TimeUnit.MINUTES => Option(DateHistogramInterval.Minute) - case TimeUnit.SECONDS => Option(DateHistogramInterval.Second) - case _ => None - } - case _ => None + val aggScript = + if (!bucket.isBucketScript && bucket.shouldBeScripted) { + val context = PainlessContext() + val painless = bucket.painless(Some(context)) + Some(Script(s"$context$painless").lang("painless")) + } else { + None + } + + var agg: Aggregation = { + bucket.out match { + case _: SQLTemporal => + val functions = bucket.identifier.functions + val interval: Option[DateHistogramInterval] = + if (functions.size == 1) { + functions.head match { + case trunc: DateTrunc => + trunc.unit match { + case TimeUnit.YEARS => Option(DateHistogramInterval.Year) + case TimeUnit.QUARTERS => Option(DateHistogramInterval.Quarter) + case TimeUnit.MONTHS => Option(DateHistogramInterval.Month) + case TimeUnit.WEEKS => Option(DateHistogramInterval.Week) + case TimeUnit.DAYS => Option(DateHistogramInterval.Day) + case TimeUnit.HOURS => Option(DateHistogramInterval.Hour) + case TimeUnit.MINUTES => Option(DateHistogramInterval.Minute) + case TimeUnit.SECONDS => Option(DateHistogramInterval.Second) + case _ => None + } + case _ => None + } + } else { + None } - } else { - None + + aggScript match { + case Some(script) => + // Scripted date histogram + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .script(script) + .minDocCount(1) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .script(script) + .minDocCount(1) + } + case _ => + // Standard date histogram + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .field(currentBucketNestedPath) + .minDocCount(1) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .field(currentBucketNestedPath) + .minDocCount(1) + } } - aggScript match { - case Some(script) => - // Scripted date histogram - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - DateHistogramAggregation(bucket.name, calendarInterval = interval) - .script(script) - .minDocCount(1) - .order(direction match { - case Asc => HistogramOrder("_key", asc = true) - case _ => HistogramOrder("_key", asc = false) - }) + case _ => + aggScript match { + case Some(script) => + // Scripted terms aggregation + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + TermsAggregation(bucket.name) + .script(script) + .minDocCount(1) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + TermsAggregation(bucket.name) + .script(script) + .minDocCount(1) + } + case _ => + // Standard terms aggregation + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + termsAgg(bucket.name, currentBucketNestedPath) + .minDocCount(1) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + termsAgg(bucket.name, currentBucketNestedPath) + .minDocCount(1) + } + } + } + } + agg match { + case termsAgg: TermsAggregation => + bucket.size.foreach(s => agg = termsAgg.size(s)) + having match { + case Some(criteria) => + criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = termsAgg.includeRegex(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = termsAgg.includeExactValues(values.toArray) case _ => - DateHistogramAggregation(bucket.name, calendarInterval = interval) - .script(script) - .minDocCount(1) } - case _ => - // Standard date histogram - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - DateHistogramAggregation(bucket.name, calendarInterval = interval) - .field(currentNestedPath) - .minDocCount(1) - .order(direction match { - case Asc => HistogramOrder("_key", asc = true) - case _ => HistogramOrder("_key", asc = false) - }) + criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = termsAgg.excludeRegex(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = termsAgg.excludeExactValues(values.toArray) case _ => - DateHistogramAggregation(bucket.name, calendarInterval = interval) - .field(currentNestedPath) - .minDocCount(1) } + case _ => } - case _ => - aggScript match { - case Some(script) => - // Scripted terms aggregation - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - TermsAggregation(bucket.name) - .script(script) - .minDocCount(1) - .order(Seq(direction match { - case Asc => TermsOrder("_key", asc = true) - case _ => TermsOrder("_key", asc = false) - })) - case _ => - TermsAggregation(bucket.name) - .script(script) - .minDocCount(1) - } + } + current match { + case Some(subAgg) => + agg match { + case termsAgg: TermsAggregation => + agg = termsAgg.subaggs(aggregations :+ subAgg) + case dateHistogramAgg: DateHistogramAggregation => + agg = dateHistogramAgg.subaggs(aggregations :+ subAgg) case _ => - // Standard terms aggregation - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - termsAgg(bucket.name, currentNestedPath) - .minDocCount(1) - .order(Seq(direction match { - case Asc => TermsOrder("_key", asc = true) - case _ => TermsOrder("_key", asc = false) - })) - case _ => - termsAgg(bucket.name, currentNestedPath) - .minDocCount(1) - } } - } - } - agg match { - case termsAgg: TermsAggregation => - bucket.size.foreach(s => agg = termsAgg.size(s)) - having match { - case Some(criteria) => - criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { - case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => - agg = termsAgg.includeRegex(regex) - case BucketIncludesExcludes(values, _) if values.nonEmpty => - agg = termsAgg.includeExactValues(values.toArray) - case _ => - } - criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { - case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => - agg = termsAgg.excludeRegex(regex) - case BucketIncludesExcludes(values, _) if values.nonEmpty => - agg = termsAgg.excludeExactValues(values.toArray) - case _ => - } - case _ => - } - case _ => - } - current match { - case Some(subAgg) => - agg match { - case termsAgg: TermsAggregation => - agg = termsAgg.subaggs(Seq(subAgg)) - case dateHistogramAgg: DateHistogramAggregation => - agg = dateHistogramAgg.subaggs(Seq(subAgg)) - case _ => - } - Some(agg) - case None => - val subaggs = - having match { - case Some(criteria) => - val script = metricSelectorForBucket( - criteria, - nested, - allElasticAggregations - ) - - if (script.nonEmpty) { - val bucketSelector = - bucketSelectorAggregation( - "having_filter", - Script(script), - extractMetricsPathForBucket( - criteria, - nested, - allElasticAggregations + Some(agg) + case None => + val subaggs = + having match { + case Some(criteria) => + val script = metricSelectorForBucket( + criteria, + nested, + allElasticAggregations + ) + + if (script.nonEmpty) { + val bucketSelector = + bucketSelectorAggregation( + "having_filter", + Script(script), + extractMetricsPathForBucket( + criteria, + nested, + allElasticAggregations + ) ) - ) - aggregations :+ bucketSelector - } else { + aggregations :+ bucketSelector + } else { + aggregations + } + case None => aggregations - } - case None => - aggregations - } + } - agg match { - case termsAgg: TermsAggregation => - val aggregationsWithOrder: Seq[TermsOrder] = aggregationsDirection.toSeq.map { kv => - kv._2 match { - case Asc => TermsOrder(kv._1, asc = true) - case _ => TermsOrder(kv._1, asc = false) + agg match { + case termsAgg: TermsAggregation => + val aggregationsWithOrder: Seq[TermsOrder] = aggregationsDirection.toSeq.map { kv => + kv._2 match { + case Asc => TermsOrder(kv._1, asc = true) + case _ => TermsOrder(kv._1, asc = false) + } } - } - if (aggregationsWithOrder.nonEmpty) - agg = termsAgg.order(aggregationsWithOrder).copy(subaggs = subaggs) - else - agg = termsAgg.copy(subaggs = subaggs) - case dateHistogramAggregation: DateHistogramAggregation => - agg = dateHistogramAggregation.copy(subaggs = subaggs) - } - Some(agg) + if (aggregationsWithOrder.nonEmpty) + agg = termsAgg.order(aggregationsWithOrder).copy(subaggs = subaggs) + else + agg = termsAgg.copy(subaggs = subaggs) + case dateHistogramAggregation: DateHistogramAggregation => + agg = dateHistogramAggregation.copy(subaggs = subaggs) + } + Some(agg) + } } } } diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala index c1b31a50..69be62e9 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala @@ -151,7 +151,7 @@ package object bridge { ): Seq[AbstractAggregation] = { val notNestedAggregations = aggregations.filterNot(_.nested) - val notNestedBuckets = request.buckets.filterNot(_.nested) + val notNestedBuckets = request.bucketTree.filterNot(_.bucket.nested) val rootAggregations = notNestedAggregations match { case Nil => @@ -164,8 +164,8 @@ package object bridge { None, aggregations ) match { - case Some(b) => Seq(b) - case _ => Seq.empty + case Nil => Seq.empty + case aggs => aggs } buckets case aggs => @@ -179,14 +179,18 @@ package object bridge { val buckets = ElasticAggregation.buildBuckets( notNestedBuckets, request.sorts -- directions.keys, - aggregations, + aggs, directions, request.having.flatMap(_.criteria), None, aggs ) match { - case Some(b) => Seq(b) - case _ => aggregations + case Nil => aggs.map(_.agg) + case aggs => + if (request.groupBy.isEmpty && request.windowFunctions.exists(_.isWindowing)) + notNestedAggregations.filter(_.bucketPath.isEmpty).map(_.agg) ++ aggs + else + aggs } buckets } @@ -210,12 +214,14 @@ package object bridge { // Group nested buckets by their nested path val nestedGroupedBuckets = - request.buckets - .filter(_.nested) - .groupBy( - _.nestedBucket.getOrElse( - throw new IllegalArgumentException( - "Nested bucket must have a nested element" + request.bucketTree + .filter(_.bucket.nested) + .map(tree => + tree.groupBy( + _.bucket.nestedBucket.getOrElse( + throw new IllegalArgumentException( + "Nested bucket must have a nested element" + ) ) ) ) @@ -235,17 +241,16 @@ package object bridge { // Get the buckets for this nested element val nestedBuckets = - nestedGroupedBuckets.getOrElse(n.innerHitsName, Seq.empty) + nestedGroupedBuckets.map(_.getOrElse(n.innerHitsName, Seq.empty)) val notRelatedAggregationsToBuckets = elasticAggregations .filterNot { ea => - nestedBuckets.exists(nb => nb.identifier.path == ea.sourceField) + nestedBuckets.flatten.exists(nb => nb.bucket.identifier.path == ea.sourceField) } - .map(_.agg) val relatedAggregationsToBuckets = elasticAggregations .filter { ea => - nestedBuckets.exists(nb => nb.identifier.path == ea.sourceField) + nestedBuckets.flatten.exists(nb => nb.bucket.identifier.path == ea.sourceField) } .map(_.agg) @@ -273,8 +278,12 @@ package object bridge { Some(n), aggregations ) match { - case Some(b) => Seq(b) - case _ => notRelatedAggregationsToBuckets + case Nil => notRelatedAggregationsToBuckets.map(_.agg) + case aggs => + if (request.groupBy.isEmpty && request.windowFunctions.exists(_.isWindowing)) + notRelatedAggregationsToBuckets.filter(_.bucketPath.isEmpty).map(_.agg) ++ aggs + else + aggs } val children = n.children diff --git a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala index a773c6d8..afd3c30b 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala @@ -68,7 +68,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { collection.immutable.Seq(single.sources: _*), sql = Some(sql.query) ) - if (single.windowBuckets.nonEmpty) + if (single.windowFunctions.exists(_.isWindowing)) searchWithWindowEnrichment(sql, single) else singleSearch(elasticQuery, single.fieldAliases, single.sqlAggregations) @@ -131,7 +131,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { val query = elasticQuery.query val indices = elasticQuery.indices.mkString(",") - logger.debug( + logger.info( s"🔍 Searching with query \n$elasticQuery\nin indices '$indices'" ) @@ -1168,7 +1168,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { request .copy( select = request.select.copy(fields = request.windowFields), - groupBy = request.groupBy.map(_.copy(buckets = request.windowBuckets)), + groupBy = None, //request.groupBy.map(_.copy(buckets = request.windowBuckets)), orderBy = None, // Not needed for aggregations limit = None // Need all buckets ) @@ -1236,7 +1236,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { ): SQLSearchRequest = { // Remove window function fields from SELECT - val baseFields = request.select.fields.filterNot(_.windows.nonEmpty) + val baseFields = request.select.fields.filterNot(_.identifier.hasWindow) // Create modified request val baseRequest = request diff --git a/core/src/main/scala/app/softnetwork/elastic/client/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/package.scala index 3ac38763..3cf886f3 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/package.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/package.scala @@ -168,7 +168,8 @@ package object client extends SerializationApi { aggType: AggregationType.AggregationType, distinct: Boolean, sourceField: String, - windowing: Boolean + windowing: Boolean, + bucketPath: String ) { def multivalued: Boolean = aggType == AggregationType.ArrayAgg def singleValued: Boolean = !multivalued @@ -191,7 +192,8 @@ package object client extends SerializationApi { aggType, agg.distinct, agg.sourceField, - agg.aggType.isWindowing + agg.aggType.isWindowing, + agg.bucketPath ) } } diff --git a/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala index ac977bf8..ea04c8df 100644 --- a/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala +++ b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala @@ -190,7 +190,8 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver aggType = AggregationType.ArrayAgg, distinct = false, "name", - windowing = true + windowing = true, + "" ) ) ) match { @@ -643,7 +644,8 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver aggType = AggregationType.ArrayAgg, distinct = false, "name", - windowing = true + windowing = true, + "" ) ) ) match { diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index cb1a390e..769e6265 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -20,8 +20,9 @@ import app.softnetwork.elastic.sql.PainlessContext import app.softnetwork.elastic.sql.`type`.SQLTemporal import app.softnetwork.elastic.sql.query.{ Asc, - Bucket, BucketIncludesExcludes, + BucketNode, + BucketTree, Criteria, Desc, Field, @@ -79,7 +80,8 @@ case class ElasticAggregation( aggType: AggregateFunction, agg: AbstractAggregation, direction: Option[SortOrder] = None, - nestedElement: Option[NestedElement] = None + nestedElement: Option[NestedElement] = None, + bucketPath: String = "" ) { val nested: Boolean = nestedElement.nonEmpty val filtered: Boolean = filteredAgg.nonEmpty @@ -115,7 +117,7 @@ object ElasticAggregation { val distinct = identifier.distinct - val aggType = { + var aggType = { if (isBucketScript) { BucketScriptAggregation(identifier) } else @@ -208,13 +210,13 @@ object ElasticAggregation { sort.order match { case Some(Desc) => th.window match { - case LAST_VALUE => FieldSort(sort.field.aliasOrName).asc() - case _ => FieldSort(sort.field.aliasOrName).desc() + case LAST_VALUE => FieldSort(sort.field.name).asc() + case _ => FieldSort(sort.field.name).desc() } case _ => th.window match { - case LAST_VALUE => FieldSort(sort.field.aliasOrName).desc() - case _ => FieldSort(sort.field.aliasOrName).asc() + case LAST_VALUE => FieldSort(sort.field.name).desc() + case _ => FieldSort(sort.field.name).asc() } } ) @@ -224,6 +226,7 @@ object ElasticAggregation { case Some(sqlAgg) => sqlAgg.aggType match { case bsa: BucketScriptAggregation => + aggType = bsa extractMetricsPathForBucketScript(bsa, allAggregations.values.toSeq) case _ => Map.empty } @@ -284,6 +287,12 @@ object ElasticAggregation { Some(nestedAgg) } + val bucketPath = + aggType.bucketPath match { + case paths if paths.isEmpty => identifier.bucketPath + case other => other + } + ElasticAggregation( aggPath.mkString("."), field, @@ -293,205 +302,218 @@ object ElasticAggregation { aggType = aggType, agg = _agg, direction = direction, - nestedElement = nestedElement + nestedElement = nestedElement, + bucketPath = bucketPath ) } def buildBuckets( - buckets: Seq[Bucket], + buckets: Seq[Seq[BucketNode]], bucketsDirection: Map[String, SortOrder], - aggregations: Seq[AbstractAggregation], + aggs: Seq[ElasticAggregation], aggregationsDirection: Map[String, SortOrder], having: Option[Criteria], nested: Option[NestedElement], allElasticAggregations: Seq[ElasticAggregation] - ): Option[Aggregation] = { - buckets.reverse.foldLeft(Option.empty[Aggregation]) { (current, bucket) => - // Determine the bucketPath of the current bucket - val currentNestedPath = bucket.identifier.path - - val aggScript = - if (!bucket.isBucketScript && bucket.shouldBeScripted) { - val context = PainlessContext() - val painless = bucket.painless(Some(context)) - Some(Script(s"$context$painless").lang("painless")) - } else { - None - } + ): Seq[Aggregation] = { + println( + s"[DEBUG] buildBuckets called with buckets: \n${BucketTree(buckets.flatMap(_.headOption))}" + ) + buckets.flatMap { tree => + tree.reverse.foldLeft(Option.empty[Aggregation]) { (current, node) => + val currentBucketPath = node.bucketPath + + val bucket = node.bucket + + val aggregations = + aggs.filter(agg => agg.bucketPath == currentBucketPath).map(_.agg) + + // Determine the nested path of the current bucket + val currentBucketNestedPath = bucket.identifier.path - var agg: Aggregation = { - bucket.out match { - case _: SQLTemporal => - val functions = bucket.identifier.functions - val interval: Option[DateHistogramInterval] = - if (functions.size == 1) { - functions.head match { - case trunc: DateTrunc => - trunc.unit match { - case TimeUnit.YEARS => Option(DateHistogramInterval.Year) - case TimeUnit.QUARTERS => Option(DateHistogramInterval.Quarter) - case TimeUnit.MONTHS => Option(DateHistogramInterval.Month) - case TimeUnit.WEEKS => Option(DateHistogramInterval.Week) - case TimeUnit.DAYS => Option(DateHistogramInterval.Day) - case TimeUnit.HOURS => Option(DateHistogramInterval.Hour) - case TimeUnit.MINUTES => Option(DateHistogramInterval.Minute) - case TimeUnit.SECONDS => Option(DateHistogramInterval.Second) - case _ => None - } - case _ => None + val aggScript = + if (!bucket.isBucketScript && bucket.shouldBeScripted) { + val context = PainlessContext() + val painless = bucket.painless(Some(context)) + Some(Script(s"$context$painless").lang("painless")) + } else { + None + } + + var agg: Aggregation = { + bucket.out match { + case _: SQLTemporal => + val functions = bucket.identifier.functions + val interval: Option[DateHistogramInterval] = + if (functions.size == 1) { + functions.head match { + case trunc: DateTrunc => + trunc.unit match { + case TimeUnit.YEARS => Option(DateHistogramInterval.Year) + case TimeUnit.QUARTERS => Option(DateHistogramInterval.Quarter) + case TimeUnit.MONTHS => Option(DateHistogramInterval.Month) + case TimeUnit.WEEKS => Option(DateHistogramInterval.Week) + case TimeUnit.DAYS => Option(DateHistogramInterval.Day) + case TimeUnit.HOURS => Option(DateHistogramInterval.Hour) + case TimeUnit.MINUTES => Option(DateHistogramInterval.Minute) + case TimeUnit.SECONDS => Option(DateHistogramInterval.Second) + case _ => None + } + case _ => None + } + } else { + None } - } else { - None + + aggScript match { + case Some(script) => + // Scripted date histogram + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, interval = interval) + .script(script) + .minDocCount(1) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, interval = interval) + .script(script) + .minDocCount(1) + } + case _ => + // Standard date histogram + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, interval = interval) + .field(currentBucketNestedPath) + .minDocCount(1) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, interval = interval) + .field(currentBucketNestedPath) + .minDocCount(1) + } } - aggScript match { - case Some(script) => - // Scripted date histogram - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - DateHistogramAggregation(bucket.name, interval = interval) - .script(script) - .minDocCount(1) - .order(direction match { - case Asc => HistogramOrder("_key", asc = true) - case _ => HistogramOrder("_key", asc = false) - }) + case _ => + aggScript match { + case Some(script) => + // Scripted terms aggregation + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + TermsAggregation(bucket.name) + .script(script) + .minDocCount(1) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + TermsAggregation(bucket.name) + .script(script) + .minDocCount(1) + } + case _ => + // Standard terms aggregation + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + termsAgg(bucket.name, currentBucketNestedPath) + .minDocCount(1) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + termsAgg(bucket.name, currentBucketNestedPath) + .minDocCount(1) + } + } + } + } + agg match { + case termsAgg: TermsAggregation => + bucket.size.foreach(s => agg = termsAgg.size(s)) + having match { + case Some(criteria) => + criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = termsAgg.include(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = termsAgg.include(values.toArray) case _ => - DateHistogramAggregation(bucket.name, interval = interval) - .script(script) - .minDocCount(1) } - case _ => - // Standard date histogram - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - DateHistogramAggregation(bucket.name, interval = interval) - .field(currentNestedPath) - .minDocCount(1) - .order(direction match { - case Asc => HistogramOrder("_key", asc = true) - case _ => HistogramOrder("_key", asc = false) - }) + criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = termsAgg.exclude(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = termsAgg.exclude(values.toArray) case _ => - DateHistogramAggregation(bucket.name, interval = interval) - .field(currentNestedPath) - .minDocCount(1) } + case _ => } - case _ => - aggScript match { - case Some(script) => - // Scripted terms aggregation - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - TermsAggregation(bucket.name) - .script(script) - .minDocCount(1) - .order(Seq(direction match { - case Asc => TermsOrder("_key", asc = true) - case _ => TermsOrder("_key", asc = false) - })) - case _ => - TermsAggregation(bucket.name) - .script(script) - .minDocCount(1) - } + } + current match { + case Some(subAgg) => + agg match { + case termsAgg: TermsAggregation => + agg = termsAgg.subaggs(aggregations :+ subAgg) + case dateHistogramAgg: DateHistogramAggregation => + agg = dateHistogramAgg.subaggs(aggregations :+ subAgg) case _ => - // Standard terms aggregation - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - termsAgg(bucket.name, currentNestedPath) - .minDocCount(1) - .order(Seq(direction match { - case Asc => TermsOrder("_key", asc = true) - case _ => TermsOrder("_key", asc = false) - })) - case _ => - termsAgg(bucket.name, currentNestedPath) - .minDocCount(1) - } } - } - } - agg match { - case termsAgg: TermsAggregation => - bucket.size.foreach(s => agg = termsAgg.size(s)) - having match { - case Some(criteria) => - criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { - case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => - agg = termsAgg.include(regex) - case BucketIncludesExcludes(values, _) if values.nonEmpty => - agg = termsAgg.include(values.toArray) - case _ => - } - criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { - case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => - agg = termsAgg.exclude(regex) - case BucketIncludesExcludes(values, _) if values.nonEmpty => - agg = termsAgg.exclude(values.toArray) - case _ => - } - case _ => - } - case _ => - } - current match { - case Some(subAgg) => - agg match { - case termsAgg: TermsAggregation => - agg = termsAgg.subaggs(Seq(subAgg)) - case dateHistogramAgg: DateHistogramAggregation => - agg = dateHistogramAgg.subaggs(Seq(subAgg)) - case _ => - } - Some(agg) - case None => - val subaggs = - having match { - case Some(criteria) => - val script = metricSelectorForBucket( - criteria, - nested, - allElasticAggregations - ) - - if (script.nonEmpty) { - val bucketSelector = - bucketSelectorAggregation( - "having_filter", - Script(script), - extractMetricsPathForBucket( - criteria, - nested, - allElasticAggregations + Some(agg) + case None => + val subaggs = + having match { + case Some(criteria) => + val script = metricSelectorForBucket( + criteria, + nested, + allElasticAggregations + ) + + if (script.nonEmpty) { + val bucketSelector = + bucketSelectorAggregation( + "having_filter", + Script(script), + extractMetricsPathForBucket( + criteria, + nested, + allElasticAggregations + ) ) - ) - aggregations :+ bucketSelector - } else { + aggregations :+ bucketSelector + } else { + aggregations + } + case None => aggregations - } - case None => - aggregations - } + } - agg match { - case termsAgg: TermsAggregation => - val aggregationsWithOrder: Seq[TermsOrder] = aggregationsDirection.toSeq.map { kv => - kv._2 match { - case Asc => TermsOrder(kv._1, asc = true) - case _ => TermsOrder(kv._1, asc = false) + agg match { + case termsAgg: TermsAggregation => + val aggregationsWithOrder: Seq[TermsOrder] = aggregationsDirection.toSeq.map { kv => + kv._2 match { + case Asc => TermsOrder(kv._1, asc = true) + case _ => TermsOrder(kv._1, asc = false) + } } - } - if (aggregationsWithOrder.nonEmpty) - agg = termsAgg.order(aggregationsWithOrder).copy(subaggs = subaggs) - else - agg = termsAgg.copy(subaggs = subaggs) - case dateHistogramAggregation: DateHistogramAggregation => - agg = dateHistogramAggregation.copy(subaggs = subaggs) - } - Some(agg) + if (aggregationsWithOrder.nonEmpty) + agg = termsAgg.order(aggregationsWithOrder).copy(subaggs = subaggs) + else + agg = termsAgg.copy(subaggs = subaggs) + case dateHistogramAggregation: DateHistogramAggregation => + agg = dateHistogramAggregation.copy(subaggs = subaggs) + } + Some(agg) + } } } } diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala index 9dd7b421..7f7843b4 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala @@ -147,7 +147,7 @@ package object bridge { ): Seq[AbstractAggregation] = { val notNestedAggregations = aggregations.filterNot(_.nested) - val notNestedBuckets = request.buckets.filterNot(_.nested) + val notNestedBuckets = request.bucketTree.filterNot(_.bucket.nested) val rootAggregations = notNestedAggregations match { case Nil => @@ -160,8 +160,8 @@ package object bridge { None, aggregations ) match { - case Some(b) => Seq(b) - case _ => Seq.empty + case Nil => Seq.empty + case aggs => aggs } buckets case aggs => @@ -170,19 +170,21 @@ package object bridge { .map(agg => agg.agg.name -> agg.direction.get) .toMap - val aggregations = aggs.map(_.agg) - val buckets = ElasticAggregation.buildBuckets( notNestedBuckets, request.sorts -- directions.keys, - aggregations, + aggs, directions, request.having.flatMap(_.criteria), None, aggs ) match { - case Some(b) => Seq(b) - case _ => aggregations + case Nil => aggs.map(_.agg) + case aggs => + if (request.groupBy.isEmpty && request.windowFunctions.exists(_.isWindowing)) + notNestedAggregations.filter(_.bucketPath.isEmpty).map(_.agg) ++ aggs + else + aggs } buckets } @@ -206,12 +208,14 @@ package object bridge { // Group nested buckets by their nested path val nestedGroupedBuckets = - request.buckets - .filter(_.nested) - .groupBy( - _.nestedBucket.getOrElse( - throw new IllegalArgumentException( - "Nested bucket must have a nested element" + request.bucketTree + .filter(_.bucket.nested) + .map(tree => + tree.groupBy( + _.bucket.nestedBucket.getOrElse( + throw new IllegalArgumentException( + "Nested bucket must have a nested element" + ) ) ) ) @@ -231,17 +235,16 @@ package object bridge { // Get the buckets for this nested element val nestedBuckets = - nestedGroupedBuckets.getOrElse(n.innerHitsName, Seq.empty) + nestedGroupedBuckets.map(_.getOrElse(n.innerHitsName, Seq.empty)) val notRelatedAggregationsToBuckets = elasticAggregations .filterNot { ea => - nestedBuckets.exists(nb => nb.identifier.path == ea.sourceField) + nestedBuckets.flatten.exists(nb => nb.bucket.identifier.path == ea.sourceField) } - .map(_.agg) val relatedAggregationsToBuckets = elasticAggregations .filter { ea => - nestedBuckets.exists(nb => nb.identifier.path == ea.sourceField) + nestedBuckets.flatten.exists(nb => nb.bucket.identifier.path == ea.sourceField) } .map(_.agg) @@ -269,8 +272,12 @@ package object bridge { Some(n), aggregations ) match { - case Some(b) => Seq(b) - case _ => notRelatedAggregationsToBuckets + case Nil => notRelatedAggregationsToBuckets.map(_.agg) + case aggs => + if (request.groupBy.isEmpty && request.windowFunctions.exists(_.isWindowing)) + notRelatedAggregationsToBuckets.filter(_.bucketPath.isEmpty).map(_.agg) ++ aggs + else + aggs } val children = n.children @@ -532,7 +539,7 @@ package object bridge { case _ => _search } - if (allAggregations.nonEmpty || buckets.nonEmpty) { + if (allAggregations.nonEmpty && fields.isEmpty) { _search size 0 fetchSource false } else { limit match { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala index 12287ae8..6349c87c 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala @@ -16,8 +16,15 @@ package app.softnetwork.elastic.sql.function -import app.softnetwork.elastic.sql.query.{Bucket, Field, Limit, OrderBy, SQLSearchRequest} -import app.softnetwork.elastic.sql.{asString, Expr, Identifier, TokenRegex, Updateable} +import app.softnetwork.elastic.sql.query.{ + Bucket, + BucketPath, + Field, + Limit, + OrderBy, + SQLSearchRequest +} +import app.softnetwork.elastic.sql.{Expr, Identifier, TokenRegex, Updateable} package object aggregate { @@ -33,6 +40,9 @@ package object aggregate { /** Indicates whether this aggregation is a windowing function with partitioning or not */ def isWindowing: Boolean = false + + def bucketPath: String = "" + } case object COUNT extends Expr("COUNT") with AggregateFunction @@ -77,12 +87,21 @@ package object aggregate { override def isBucketScript: Boolean = true + lazy val aggregations: Seq[AggregateFunction] = FunctionUtils.aggregateFunctions(identifier) + + // Get the longest bucket path among the aggregations involved in the bucket script + // TODO we should check that all bucket paths among the aggregations belong to the same buckets tree + override lazy val bucketPath: String = + aggregations.map(_.bucketPath).distinct.sortBy(_.length).reverse.headOption.getOrElse("") + override def update(request: SQLSearchRequest): BucketScriptAggregation = { val identifiers = FunctionUtils.aggregateIdentifiers(identifier) val params = identifiers.flatMap { case identifier: Identifier => val name = identifier.metricName.getOrElse(identifier.aliasOrName) - Some(name -> request.fieldAliases.getOrElse(identifier.identifierName, name)) + Some( + name -> request.fieldAliases.getOrElse(identifier.identifierName, name) + ) // TODO may be be a path case _ => None }.toMap this.copy(params = params) @@ -105,6 +124,8 @@ package object aggregate { lazy val buckets: Seq[Bucket] = partitionBy.map(identifier => Bucket(identifier, None)) + override lazy val bucketPath: String = BucketPath(buckets).path + lazy val bucketNames: Map[String, Bucket] = buckets.map { b => b.identifier.identifierName -> b }.toMap @@ -126,16 +147,16 @@ package object aggregate { val updated = this .withPartitionBy(partitionBy = partitionBy.map(_.update(request))) updated.withFields( - fields = if (isWindowing) { - request.select.fields - .filterNot(field => - field.isAggregation || request.bucketNames.keys.toSeq - .contains(field.identifier.identifierName) - ) - .filterNot(f => request.excludes.contains(f.sourceField)) - } else { - updated.fields - } + fields = request.select.fields + .filterNot(field => + field.isAggregation || request.bucketNames.keys.toSeq + .contains(field.identifier.identifierName) + ) + .filterNot(field => + updated.bucketNames.keys.toSeq + .contains(field.identifier.identifierName) + ) + .filterNot(f => request.excludes.contains(f.sourceField)) ) } } @@ -155,7 +176,8 @@ package object aggregate { .update(request) .asInstanceOf[FirstValue] .copy( - identifier = identifier.update(request) + identifier = identifier.update(request), + orderBy = orderBy.update(request) ) } @@ -174,7 +196,8 @@ package object aggregate { .update(request) .asInstanceOf[LastValue] .copy( - identifier = identifier.update(request) + identifier = identifier.update(request), + orderBy = orderBy.update(request) ) } @@ -194,6 +217,7 @@ package object aggregate { .asInstanceOf[ArrayAgg] .copy( identifier = identifier.update(request), + orderBy = orderBy.update(request), limit = limit.orElse(request.limit) ) override def multivalued: Boolean = true diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala index 420db60d..d58d26d7 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala @@ -16,7 +16,13 @@ package app.softnetwork.elastic -import app.softnetwork.elastic.sql.function.aggregate.{COUNT, MAX, MIN} +import app.softnetwork.elastic.sql.function.aggregate.{ + AggregateFunction, + COUNT, + MAX, + MIN, + WindowFunction +} import app.softnetwork.elastic.sql.function.geo.DistanceUnit import app.softnetwork.elastic.sql.function.time.CurrentFunction import app.softnetwork.elastic.sql.operator._ @@ -625,6 +631,10 @@ package object sql { def bucket: Option[Bucket] def hasBucket: Boolean = bucket.isDefined + lazy val aggregations: Seq[AggregateFunction] = FunctionUtils.aggregateFunctions(this) + + def bucketPath: String + lazy val allMetricsPath: Map[String, String] = { metricName match { case Some(name) => Map(name -> name) @@ -804,6 +814,14 @@ package object sql { case g: GenericIdentifier => g.copy(nested = nested) case _ => this } + + lazy val windows: Option[WindowFunction] = + functions.collectFirst { case th: WindowFunction => th } + + def hasWindow: Boolean = windows.nonEmpty + + def isWindowing: Boolean = windows.exists(_.partitionBy.nonEmpty) + } object Identifier { @@ -824,7 +842,8 @@ package object sql { functions: List[Function] = List.empty, fieldAlias: Option[String] = None, bucket: Option[Bucket] = None, - nestedElement: Option[NestedElement] = None + nestedElement: Option[NestedElement] = None, + bucketPath: String = "" ) extends Identifier { def withFunctions(functions: List[Function]): Identifier = this.copy(functions = functions) @@ -836,6 +855,19 @@ package object sql { } def update(request: SQLSearchRequest): Identifier = { + val bucketPath: String = + request.groupBy match { + case Some(gb) => + BucketPath( + gb.buckets.map(b => request.bucketNames.getOrElse(b.identifier.identifierName, b)) + ).path + case None /*if this.bucketPath.isEmpty*/ => + aggregateFunction match { + case Some(af) => af.bucketPath + case _ => this.bucketPath + } + //case _ => this.bucketPath + } val parts: Seq[String] = name.split("\\.").toSeq val tableAlias = parts.head if (request.tableAliases.values.toSeq.contains(tableAlias)) { @@ -854,7 +886,8 @@ package object sql { limit = tuple._2._2, fieldAlias = request.fieldAliases.get(identifierName).orElse(fieldAlias), bucket = request.bucketNames.get(identifierName).orElse(bucket), - nestedElement = nestedElement + nestedElement = nestedElement, + bucketPath = bucketPath ) .withFunctions(this.updateFunctions(request)) case Some(tuple) if nested => @@ -864,7 +897,8 @@ package object sql { name = s"${tuple._2._1}.${parts.tail.mkString(".")}", limit = tuple._2._2, fieldAlias = request.fieldAliases.get(identifierName).orElse(fieldAlias), - bucket = request.bucketNames.get(identifierName).orElse(bucket) + bucket = request.bucketNames.get(identifierName).orElse(bucket), + bucketPath = bucketPath ) .withFunctions(this.updateFunctions(request)) case None if nested => @@ -872,7 +906,8 @@ package object sql { .copy( tableAlias = Some(tableAlias), fieldAlias = request.fieldAliases.get(identifierName).orElse(fieldAlias), - bucket = request.bucketNames.get(identifierName).orElse(bucket) + bucket = request.bucketNames.get(identifierName).orElse(bucket), + bucketPath = bucketPath ) .withFunctions(this.updateFunctions(request)) case _ => @@ -880,14 +915,16 @@ package object sql { tableAlias = Some(tableAlias), name = parts.tail.mkString("."), fieldAlias = request.fieldAliases.get(identifierName).orElse(fieldAlias), - bucket = request.bucketNames.get(identifierName).orElse(bucket) + bucket = request.bucketNames.get(identifierName).orElse(bucket), + bucketPath = bucketPath ) } } else { this .copy( fieldAlias = request.fieldAliases.get(identifierName).orElse(fieldAlias), - bucket = request.bucketNames.get(identifierName).orElse(bucket) + bucket = request.bucketNames.get(identifierName).orElse(bucket), + bucketPath = bucketPath ) .withFunctions(this.updateFunctions(request)) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala index 4f94e548..cb5f81b8 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala @@ -87,7 +87,9 @@ case class Bucket( lazy val nestedBucket: Option[String] = identifier.nestedElement.map(_.innerHitsName) - lazy val name: String = identifier.fieldAlias.getOrElse(sourceBucket.replace(".", "_")) + lazy val name: String = identifier.fieldAlias.getOrElse(path) + + lazy val path: String = sourceBucket.replace(".", "_") lazy val nestedPath: String = { identifier.nestedElement match { @@ -115,6 +117,20 @@ case class Bucket( identifier.painless(context) } +case class BucketPath(buckets: Seq[Bucket]) { + + lazy val path: String = + buckets.foldLeft("")((acc, b) => { + if (acc.isEmpty) { + s"${b.path}" + } else { + s"$acc>${b.path}" + } + }) + + override def toString: String = path +} + object MetricSelectorScript { def metricSelector(expr: Criteria): String = expr match { @@ -166,3 +182,189 @@ object MetricSelectorScript { } case class BucketIncludesExcludes(values: Set[String] = Set.empty, regex: Option[String] = None) + +/** Tree structure representing buckets and their hierarchy */ +case class BucketNode( + bucket: Bucket, + children: Seq[BucketNode] = Seq.empty, + private val parent: Option[BucketNode] = + None // to track parent node in order to build bucket path +) { + def identifier: String = bucket.path + + def findNode(id: String): Option[BucketNode] = { + if (this.identifier == id) Some(this) + else children.flatMap(_.findNode(id)).headOption + } + + def depth: Int = 1 + (if (children.isEmpty) 0 else children.map(_.depth).max) + + // Check if the node is a leaf + def isLeaf: Boolean = children.isEmpty + + def bucketPath: String = { + parent match { + case Some(p) => s"${p.bucketPath}>$identifier" + case None => identifier + } + } +} + +case class BucketTree( + roots: Seq[BucketNode] = Seq.empty +) { + + private def filterNode(predicate: BucketNode => Boolean, node: BucketNode): Seq[BucketNode] = { + if (predicate(node)) { + node +: node.children.flatMap(child => filterNode(predicate, child)) + } else { + node.children.flatMap(child => filterNode(predicate, child)) + } + } + + /** Filter the bucket trees based on a predicate + * @param predicate + * the predicate to filter the buckets + * @return + * trees of buckets that satisfy the predicate + */ + def filter(predicate: BucketNode => Boolean): Seq[Seq[BucketNode]] = { + roots.map(root => filterNode(predicate, root)) + } + + /** Filter the bucket trees based on a negated predicate + * @param predicate + * the predicate to filter the buckets + * @return + * trees of buckets that do not satisfy the predicate + */ + def filterNot(predicate: BucketNode => Boolean): Seq[Seq[BucketNode]] = { + roots.map(root => filterNode(node => !predicate(node), root)) + } + + /** Find a bucket node by its identifier + * + * @param id + * the identifier of the bucket + * @return + * an option of the bucket node + */ + def findNode(id: String): Option[BucketNode] = { + roots.flatMap(_.findNode(id)).headOption + } + + /** Find a bucket by its identifier + * + * @param id + * the identifier of the bucket + * @return + * an option of the bucket + */ + def find(id: String): Option[Bucket] = { + findNode(id).map(_.bucket) + } + + /** Get the total number of nodes in the bucket trees + * + * @return + * the total number of nodes + */ + def size: Int = roots.map(countNodes).sum + + private def countNodes(node: BucketNode): Int = { + 1 + node.children.map(countNodes).sum + } + + def maxDepth: Int = { + if (roots.isEmpty) 0 else roots.map(_.depth).max + } + + /** Get all bucket trees as sequences of nodes + * + * @return + * all node trees + */ + def allTrees: Seq[Seq[BucketNode]] = roots.flatMap(collectTrees) + + /** Get all bucket trees as sequences of buckets + * + * @return + * all bucket trees + */ + def allBuckets: Seq[Seq[Bucket]] = allTrees.map(_.map(_.bucket)) + + private def collectTrees(node: BucketNode): Seq[Seq[BucketNode]] = { + if (node.isLeaf) { + Seq(Seq(node)) + } else { + node.children.flatMap { child => + collectTrees(child).map(path => node +: path) + } + } + } + + override def toString: String = { + roots.flatMap(root => printNode(root, "", isLast = true)).mkString("\n") + } + + private def printNode( + node: BucketNode, + prefix: String, + isLast: Boolean, + acc: Seq[String] = Seq.empty + ): Seq[String] = { + val connector = if (isLast) "└── " else "├── " + + val childPrefix = prefix + (if (isLast) " " else "│ ") + + (acc :+ s"$prefix$connector${node.identifier} (path: ${node.bucketPath})") ++ node.children.zipWithIndex + .flatMap { case (child, idx) => + printNode(child, childPrefix, idx == node.children.size - 1) + } + } +} + +object BucketTree { + + def fromBuckets(buckets: Seq[Seq[Bucket]]): BucketTree = { + if (buckets.isEmpty || buckets.forall(_.isEmpty)) { + return BucketTree(Seq.empty) + } + + val validBuckets = buckets.filter(_.nonEmpty) + + // Group by root bucket path + val groupedByRoot = validBuckets.groupBy(_.head.path) + + val roots = groupedByRoot + .map { case (_, pathsWithSameRoot) => + buildNode(pathsWithSameRoot) + } + .toSeq + .sortBy(_.identifier) + + BucketTree(roots) + } + + private def buildNode(paths: Seq[Seq[Bucket]], parent: Option[BucketNode] = None): BucketNode = { + val currentBucket = paths.head.head + + val childPaths = paths + .filter(_.size > 1) + .map(_.tail) + + val node = BucketNode(currentBucket, parent = parent) + + val children = if (childPaths.isEmpty) { + Seq.empty + } else { + childPaths + .groupBy(_.head.path) + .map { case (_, childPathGroup) => buildNode(childPathGroup, Some(node)) } + .toSeq + .sortBy(_.identifier) + } + + BucketNode(currentBucket, children, parent) + } +} diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala index 4e925ac6..069aa5eb 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala @@ -122,10 +122,11 @@ case class SQLSearchRequest( } lazy val fields: Seq[String] = { - if (buckets.isEmpty) + if (groupBy.isEmpty && !windowFunctions.exists(_.isWindowing)) select.fields .filterNot(_.isScriptField) .filterNot(_.nested) + .filterNot(_.isAggregation) .map(_.sourceField) .filterNot(f => excludes.contains(f)) .distinct @@ -133,14 +134,14 @@ case class SQLSearchRequest( Seq.empty } - lazy val windowFields: Seq[Field] = select.fields.filter(_.isWindow) + lazy val windowFields: Seq[Field] = select.fields.filter(_.identifier.hasWindow) - lazy val windowFunctions: Seq[WindowFunction] = windowFields.flatMap(_.windows) + lazy val windowFunctions: Seq[WindowFunction] = windowFields.flatMap(_.identifier.windows) lazy val aggregates: Seq[Field] = select.fields .filter(f => f.isAggregation || f.isBucketScript) - .filterNot(_.isWindow) ++ windowFields + .filterNot(_.identifier.hasWindow) ++ windowFields lazy val sqlAggregations: Map[String, SQLAggregation] = aggregates.flatMap(f => SQLAggregation.fromField(f, this)).map(a => a.aggName -> a).toMap @@ -149,18 +150,13 @@ case class SQLSearchRequest( lazy val sources: Seq[String] = from.tables.map(_.name) - lazy val windowBuckets: Seq[Bucket] = windowFunctions - .flatMap(_.bucketNames) - .filterNot(bucket => - groupBy.map(_.bucketNames).getOrElse(Map.empty).keys.toSeq.contains(bucket._1) + lazy val bucketTree: BucketTree = BucketTree.fromBuckets( + Seq(groupBy.map(_.buckets).getOrElse(Seq.empty)) ++ windowFunctions.map( + _.buckets ) - .toMap - .values - .groupBy(_.identifier.aliasOrName) - .map(_._2.head) - .toSeq + ) - lazy val buckets: Seq[Bucket] = groupBy.map(_.buckets).getOrElse(Seq.empty) ++ windowBuckets + lazy val buckets: Seq[Bucket] = bucketTree.allBuckets.flatten override def validate(): Either[String, Unit] = { for { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala index 526e8b4f..8dcbebe8 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala @@ -70,13 +70,8 @@ case class Field( override def functions: List[Function] = identifier.functions - lazy val windows: Option[WindowFunction] = - functions.collectFirst { case th: WindowFunction => th } - - def isWindow: Boolean = windows.nonEmpty //.exists(_.partitionBy.nonEmpty) - def update(request: SQLSearchRequest): Field = { - windows match { + identifier.windows match { case Some(th) => val windowFunction = th.update(request) val identifier = windowFunction.identifier @@ -144,7 +139,8 @@ case class SQLAggregation( aggType: AggregateFunction, direction: Option[SortOrder] = None, nestedElement: Option[NestedElement] = None, - buckets: Seq[String] = Seq.empty + buckets: Seq[String] = Seq.empty, + bucketPath: String = "" ) { val nested: Boolean = nestedElement.nonEmpty val multivalued: Boolean = aggType.multivalued @@ -225,6 +221,12 @@ object SQLAggregation { aggPath ++= Seq(aggName) } + val bucketPath = + aggType.bucketPath match { + case paths if paths.isEmpty => identifier.bucketPath + case other => other + } + Some( SQLAggregation( aggPath.mkString("."), @@ -234,7 +236,8 @@ object SQLAggregation { aggType = aggType, direction = direction, nestedElement = identifier.nestedElement, - buckets = request.buckets.map { _.name } + buckets = request.buckets.map { _.name }, + bucketPath = bucketPath ) ) } From ab72700bfe0579659f5853abb7a7b9f7724ba3ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Fri, 28 Nov 2025 16:15:11 +0100 Subject: [PATCH 25/40] update specifications for top hits aggregation, update to v 0.14.0 --- .../app/softnetwork/elastic/sql/SQLQuerySpec.scala | 12 ++++++------ build.sbt | 2 +- .../app/softnetwork/elastic/sql/SQLQuerySpec.scala | 10 +++++----- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index f0a07bd1..9dd644e0 100644 --- a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -2752,24 +2752,24 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val query = select.query println(query) query shouldBe - """{ + """{ | "query": { | "match_all": {} | }, | "size": 0, | "_source": false, | "aggs": { + | "cnt": { + | "cardinality": { + | "field": "salary" + | } + | }, | "dept": { | "terms": { | "field": "department", | "min_doc_count": 1 | }, | "aggs": { - | "cnt": { - | "cardinality": { - | "field": "salary" - | } - | }, | "first_salary": { | "top_hits": { | "size": 1, diff --git a/build.sbt b/build.sbt index 862d0c72..b5877a91 100644 --- a/build.sbt +++ b/build.sbt @@ -19,7 +19,7 @@ ThisBuild / organization := "app.softnetwork" name := "softclient4es" -ThisBuild / version := "0.13.1" +ThisBuild / version := "0.14.0" ThisBuild / scalaVersion := scala213 diff --git a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 133cde4c..23869a7b 100644 --- a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -2759,17 +2759,17 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "size": 0, | "_source": false, | "aggs": { + | "cnt": { + | "cardinality": { + | "field": "salary" + | } + | }, | "dept": { | "terms": { | "field": "department", | "min_doc_count": 1 | }, | "aggs": { - | "cnt": { - | "cardinality": { - | "field": "salary" - | } - | }, | "first_salary": { | "top_hits": { | "size": 1, From 422e221a952019df6586a30e40046f0cc1000f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Fri, 28 Nov 2025 16:20:33 +0100 Subject: [PATCH 26/40] fix lint --- .../test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 9dd644e0..dd0f26cf 100644 --- a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -2752,7 +2752,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val query = select.query println(query) query shouldBe - """{ + """{ | "query": { | "match_all": {} | }, From b9d784e1e844d1bfe1162193ad8315cf4873b0e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Fri, 28 Nov 2025 17:50:45 +0100 Subject: [PATCH 27/40] fix jest tests --- project/SoftClient4es.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/project/SoftClient4es.scala b/project/SoftClient4es.scala index 994501b4..6dbe8e43 100644 --- a/project/SoftClient4es.scala +++ b/project/SoftClient4es.scala @@ -152,8 +152,9 @@ trait SoftClient4es { (elasticSearchMajorVersion(esVersion) match { case 6 => Seq( - "io.searchbox" % "jest" % Versions.jest - ).map(_.excludeAll((httpComponentsExclusions ++ Seq(guavaExclusion)) *)) + "io.searchbox" % "jest" % Versions.jest, + "com.google.guava" % "guava" % "33.5.0-jre", + ).map(_.excludeAll((httpComponentsExclusions /*++ Seq(guavaExclusion)*/) *)) case _ => Seq.empty }) } From fe9cf7f9149b03c6ada1a8a63ac6e249ea759d7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Fri, 28 Nov 2025 18:04:54 +0100 Subject: [PATCH 28/40] update README.md --- README.md | 10 +-- .../elastic/sql/SQLQuerySpec.scala | 80 ++++++++++++++++--- project/SoftClient4es.scala | 4 +- 3 files changed, 75 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index cfbc96a2..ffeed6cd 100644 --- a/README.md +++ b/README.md @@ -787,18 +787,18 @@ ThisBuild / resolvers ++= Seq( // For Elasticsearch 6 // Using Jest client -libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es6-jest-client" % 0.13.1 +libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es6-jest-client" % 0.14.0 // Or using Rest High Level client -libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es6-rest-client" % 0.13.1 +libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es6-rest-client" % 0.14.0 // For Elasticsearch 7 -libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es7-rest-client" % 0.13.1 +libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es7-rest-client" % 0.14.0 // For Elasticsearch 8 -libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es8-java-client" % 0.13.1 +libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es8-java-client" % 0.14.0 // For Elasticsearch 9 -libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es9-java-client" % 0.13.1 +libraryDependencies += "app.softnetwork.elastic" %% s"softclient4es9-java-client" % 0.14.0 ``` ### **Quick Example** diff --git a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 23869a7b..e741bc9e 100644 --- a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -3738,19 +3738,75 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { } it should "test" in { - val query = + /*val query = """SELECT - | category, - | SUM(amount) AS totalSales, - | COUNT(*) AS orderCount, - | DATE_TRUNC(sales_date, MONTH) as salesMonth - | FROM orders - | GROUP BY DATE_TRUNC(sales_date, MONTH), category - | ORDER BY DATE_TRUNC(sales_date, MONTH) DESC, category ASC""".stripMargin.replaceAll( - "\n", - " " - ) + | category, + | SUM(amount) AS totalSales, + | COUNT(*) AS orderCount, + | DATE_TRUNC(sales_date, MONTH) as salesMonth, + | YEAR(sales_date) as salesYear + |FROM orders + |WHERE sales_date IS NOT NULL AND sales_date BETWEEN '2024-01-01' AND '2024-12-31' AND category IN ('Electronics', 'Books', 'Clothing') + |GROUP BY YEAR(sales_date), DATE_TRUNC(sales_date, MONTH), category + |ORDER BY YEAR(sales_date) DESC, DATE_TRUNC(sales_date, MONTH) DESC, category ASC""".stripMargin + .replaceAll( + "\n", + " " + ) val select: ElasticSearchRequest = SQLQuery(query) - println(select.query) + println(select.query)*/ + val query = + """ SELECT + | product_id AS productId, + | product_name AS productName, + | DATE_TRUNC( sale_date, MONTH ) AS saleMonth, + | SUM(amount) AS monthlySales, + | FIRST_VALUE(amount) OVER ( + | PARTITION BY product_id, DATE_TRUNC( sale_date, MONTH ) + | ORDER BY sale_date ASC + | ) AS launchMonthSales, + | LAST_VALUE(amount) OVER ( + | PARTITION BY product_id, DATE_TRUNC( sale_date, MONTH ) + | ORDER BY sale_date ASC + | ) AS peakMonthSales, + | ARRAY_AGG(amount) OVER ( + | PARTITION BY product_id + | ORDER BY sale_date ASC + | ) AS allMonthlySales + | FROM sales + | WHERE sale_date >= '2024-01-01' + | GROUP BY product_id, product_name, DATE_TRUNC( sale_date, MONTH ) + | ORDER BY product_id, DATE_TRUNC( sale_date, MONTH ) + |""".stripMargin + SQLQuery(query).request.flatMap(_.left.toOption) match { + case Some(request) => + val aggRequest = + request + .copy( + select = request.select.copy(fields = request.windowFields), + groupBy = None, //request.groupBy.map(_.copy(buckets = request.windowBuckets)), + orderBy = None, // Not needed for aggregations + limit = None // Need all buckets + ) + .update() + + val windowRequest: ElasticSearchRequest = aggRequest + println(windowRequest.query) + + // Remove window function fields from SELECT + val baseFields = request.select.fields.filterNot(_.identifier.hasWindow) + + // Create modified request + val baseRequest: ElasticSearchRequest = request + .copy( + select = request.select.copy(fields = baseFields) + ) + .update() + + println(baseRequest.query) + + case _ => + + } } } diff --git a/project/SoftClient4es.scala b/project/SoftClient4es.scala index 6dbe8e43..ac608802 100644 --- a/project/SoftClient4es.scala +++ b/project/SoftClient4es.scala @@ -153,8 +153,8 @@ trait SoftClient4es { case 6 => Seq( "io.searchbox" % "jest" % Versions.jest, - "com.google.guava" % "guava" % "33.5.0-jre", - ).map(_.excludeAll((httpComponentsExclusions /*++ Seq(guavaExclusion)*/) *)) + "com.google.guava" % "guava" % "33.5.0-jre" + ).map(_.excludeAll(httpComponentsExclusions /*++ Seq(guavaExclusion)*/ *)) case _ => Seq.empty }) } From 1097401d1cd8e79d1545cdda0b5e318b4854c67e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Sun, 30 Nov 2025 05:38:52 +0100 Subject: [PATCH 29/40] finalize support for window functions with multi-partitioning --- .../elastic/client/ElasticConversion.scala | 4 +- .../softnetwork/elastic/client/package.scala | 2 + .../client/ElasticConversionSpec.scala | 408 ++++++++++++++++++ .../elastic/client/WindowFunctionSpec.scala | 277 ++++++++++++ .../elastic/model/window/package.scala | 47 ++ 5 files changed, 737 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala index 7709f2ca..fc71bedb 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala @@ -369,6 +369,8 @@ trait ElasticConversion { // Handle each aggregation with buckets bucketAggs.flatMap { case (aggName, buckets, _) => buckets.flatMap { bucket => + val allTopHits = extractAllTopHits(bucket, fieldAliases, aggregations) + val bucketKey = extractBucketKey(bucket) val docCount = Option(bucket.get("doc_count")) .map(_.asLong()) @@ -377,7 +379,7 @@ trait ElasticConversion { val currentContext = parentContext ++ Map( aggName -> bucketKey, s"${aggName}_doc_count" -> docCount - ) + ) ++ allTopHits // Check for sub-aggregations val subAggFields = bucket diff --git a/core/src/main/scala/app/softnetwork/elastic/client/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/package.scala index 3cf886f3..0b44e37c 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/package.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/package.scala @@ -162,6 +162,8 @@ package object client extends SerializationApi { * - the source field of the aggregation * @param windowing * - whether the aggregation is a window function with partitioning + * @param bucketPath + * - the bucket path for pipeline aggregations */ case class ClientAggregation( aggName: String, diff --git a/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala index ea04c8df..45355a45 100644 --- a/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala +++ b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala @@ -658,6 +658,407 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver throw error } } + + it should "parse window results with distinct partitions" in { + val results = + """ + { + "took" : 71, + "timed_out" : false, + "_shards" : { + "failed" : 0.0, + "successful" : 1.0, + "total" : 1.0, + "skipped" : 0.0 + }, + "hits" : { + "total" : { + "relation" : "eq", + "value" : 13 + }, + "hits" : [ ], + "max_score" : null + }, + "aggregations" : { + "sterms#department" : { + "buckets" : [ { + "top_hits#last_in_dept" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 7 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_5", + "_score" : null, + "_source" : { + "salary" : 75000 + }, + "sort" : [ 1613088000000 ] + } ], + "max_score" : null + } + }, + "top_hits#first_in_dept" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 7 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_19", + "_score" : null, + "_source" : { + "salary" : 130000 + }, + "sort" : [ 1433116800000 ] + } ], + "max_score" : null + } + }, + "sterms#location" : { + "buckets" : [ { + "top_hits#last_in_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 3 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_5", + "_score" : null, + "_source" : { + "salary" : 75000 + }, + "sort" : [ 1613088000000 ] + } ], + "max_score" : null + } + }, + "top_hits#first_in_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 3 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_2", + "_score" : null, + "_source" : { + "salary" : 120000 + }, + "sort" : [ 1515542400000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 3, + "key" : "New York" + }, { + "top_hits#last_in_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 3 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_3", + "_score" : null, + "_source" : { + "salary" : 85000 + }, + "sort" : [ 1592611200000 ] + } ], + "max_score" : null + } + }, + "top_hits#first_in_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 3 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_19", + "_score" : null, + "_source" : { + "salary" : 130000 + }, + "sort" : [ 1433116800000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 3, + "key" : "San Francisco" + }, { + "top_hits#last_in_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 1 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_16", + "_score" : null, + "_source" : { + "salary" : 105000 + }, + "sort" : [ 1460246400000 ] + } ], + "max_score" : null + } + }, + "top_hits#first_in_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 1 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_16", + "_score" : null, + "_source" : { + "salary" : 105000 + }, + "sort" : [ 1460246400000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 1, + "key" : "Remote" + } ], + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0 + }, + "doc_count" : 7, + "key" : "Engineering" + }, { + "top_hits#last_in_dept" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 6 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_20", + "_score" : null, + "_source" : { + "salary" : 75000 + }, + "sort" : [ 1615766400000 ] + } ], + "max_score" : null + } + }, + "top_hits#first_in_dept" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 6 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_9", + "_score" : null, + "_source" : { + "salary" : 95000 + }, + "sort" : [ 1488931200000 ] + } ], + "max_score" : null + } + }, + "sterms#location" : { + "buckets" : [ { + "top_hits#last_in_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 3 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_20", + "_score" : null, + "_source" : { + "salary" : 75000 + }, + "sort" : [ 1615766400000 ] + } ], + "max_score" : null + } + }, + "top_hits#first_in_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 3 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_7", + "_score" : null, + "_source" : { + "salary" : 90000 + }, + "sort" : [ 1543536000000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 3, + "key" : "Chicago" + }, { + "top_hits#last_in_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 2 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_6", + "_score" : null, + "_source" : { + "salary" : 80000 + }, + "sort" : [ 1563753600000 ] + } ], + "max_score" : null + } + }, + "top_hits#first_in_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 2 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_9", + "_score" : null, + "_source" : { + "salary" : 95000 + }, + "sort" : [ 1488931200000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 2, + "key" : "New York" + }, { + "top_hits#last_in_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 1 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_17", + "_score" : null, + "_source" : { + "salary" : 92000 + }, + "sort" : [ 1551312000000 ] + } ], + "max_score" : null + } + }, + "top_hits#first_in_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 1 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_17", + "_score" : null, + "_source" : { + "salary" : 92000 + }, + "sort" : [ 1551312000000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 1, + "key" : "Remote" + } ], + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0 + }, + "doc_count" : 6, + "key" : "Sales" + } ], + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0 + } + } + }""" + val aggregations = + Map( + "first_in_dept_loc" -> ClientAggregation( + aggName = "first_in_dept_loc", + aggType = AggregationType.FirstValue, + distinct = false, + "salary", + windowing = true, + bucketPath = "department>location" + ), + "last_in_dept_loc" -> ClientAggregation( + aggName = "last_in_dept_loc", + aggType = AggregationType.LastValue, + distinct = false, + "salary", + windowing = true, + bucketPath = "department>location" + ), + "first_in_dept" -> ClientAggregation( + aggName = "first_in_dept", + aggType = AggregationType.FirstValue, + distinct = false, + "salary", + windowing = true, + bucketPath = "department" + ), + "last_in_dept" -> ClientAggregation( + aggName = "last_in_dept", + aggType = AggregationType.LastValue, + distinct = false, + "salary", + windowing = true, + bucketPath = "department" + ) + ) + parseResponse( + results, + Map.empty, + aggregations + ) match { + case Success(results) => + val rows = results.map(row => extractAggregationValues(row, aggregations)) + rows.foreach(println) + val windows = rows.map(row => convertTo[EmployeeDistinctPartitions](row)) + windows.foreach(println) + windows.size shouldBe 6 + case Failure(error) => + throw error + } + } } case class Products(category: String, top_products: List[Product], avg_price: Double) @@ -693,3 +1094,10 @@ case class User( name: String, address: Address ) + +case class EmployeeDistinctPartitions( + first_in_dept_loc: Option[Int] = None, + last_in_dept_loc: Option[Int] = None, + first_in_dept: Option[Int] = None, + last_in_dept: Option[Int] = None +) diff --git a/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala b/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala index e2e1d87e..9b0ca5d9 100644 --- a/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala +++ b/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala @@ -1270,4 +1270,281 @@ trait WindowFunctionSpec emp.department should not be empty } } + + // ======================================================================== + // Multi partitioning integration tests + // ======================================================================== + + private def checkEmployeesWithDistinctPartitions( + employees: Seq[EmployeeDistinctPartitions] + ): Unit = { + employees should not be empty + log.info(s"\n=== Testing ${employees.size} employees with 2 distinct window partitions ===") + + // Verify that all values are present + employees.foreach { emp => + emp.first_in_dept_loc shouldBe defined + emp.last_in_dept_loc shouldBe defined + emp.first_in_dept shouldBe defined + emp.last_in_dept shouldBe defined + } + + // Display data for debugging + log.info("\n--- Raw Data ---") + employees.sortBy(e => (e.department, e.location, e.hire_date)).foreach { emp => + log.info( + f"${emp.department}%-12s ${emp.location}%-15s ${emp.name}%-20s " + + f"salary=${emp.salary}%6d hire=${emp.hire_date} " + + f"| first_loc=${emp.first_in_dept_loc.get}%6d last_loc=${emp.last_in_dept_loc.get}%6d " + + f"first_dept=${emp.first_in_dept.get}%6d last_dept=${emp.last_in_dept.get}%6d" + ) + } + + // ============================================================ + // Test Window 1: PARTITION BY (department, location) + // ============================================================ + log.info("\n--- Window 1: PARTITION BY (department, location) ---") + val byDeptLoc = employees.groupBy(e => (e.department, e.location)) + + byDeptLoc.foreach { case ((dept, loc), emps) => + val sortedByHireDate = emps.sortBy(_.hire_date) + + // Tous les employés de cette partition doivent avoir le même FIRST_VALUE et LAST_VALUE + val firstValuesLoc = emps.flatMap(_.first_in_dept_loc).distinct + val lastValuesLoc = emps.flatMap(_.last_in_dept_loc).distinct + + withClue( + s"Partition ($dept, $loc) should have exactly 1 distinct first_in_dept_loc value\n" + ) { + firstValuesLoc should have size 1 + } + + withClue(s"Partition ($dept, $loc) should have exactly 1 distinct last_in_dept_loc value\n") { + lastValuesLoc should have size 1 + } + + val expectedFirstLoc = sortedByHireDate.head.salary + val expectedLastLoc = sortedByHireDate.last.salary + + withClue( + s"Partition ($dept, $loc)\n" + + s" Expected first: ${sortedByHireDate.head.name} hired on ${sortedByHireDate.head.hire_date} with salary $expectedFirstLoc\n" + + s" Actual first_in_dept_loc: ${firstValuesLoc.head}\n" + ) { + firstValuesLoc.head shouldBe expectedFirstLoc + } + + withClue( + s"Partition ($dept, $loc)\n" + + s" Expected last: ${sortedByHireDate.last.name} hired on ${sortedByHireDate.last.hire_date} with salary $expectedLastLoc\n" + + s" Actual last_in_dept_loc: ${lastValuesLoc.head}\n" + ) { + lastValuesLoc.head shouldBe expectedLastLoc + } + + log.info(f" ✓ ($dept%-12s, $loc%-15s): ${emps.size} emps") + log.info( + f" FIRST_LOC=$expectedFirstLoc%6d (${sortedByHireDate.head.name}%-20s ${sortedByHireDate.head.hire_date})" + ) + log.info( + f" LAST_LOC =$expectedLastLoc%6d (${sortedByHireDate.last.name}%-20s ${sortedByHireDate.last.hire_date})" + ) + } + + // ============================================================ + // Test Window 2: PARTITION BY (department) + // ============================================================ + log.info("\n--- Window 2: PARTITION BY (department) ---") + val byDept = employees.groupBy(_.department) + + byDept.foreach { case (dept, emps) => + val sortedByHireDate = emps.sortBy(_.hire_date) + + val firstValuesDept = emps.flatMap(_.first_in_dept).distinct + val lastValuesDept = emps.flatMap(_.last_in_dept).distinct + + withClue(s"Department $dept should have exactly 1 distinct first_in_dept value\n") { + firstValuesDept should have size 1 + } + + withClue(s"Department $dept should have exactly 1 distinct last_in_dept value\n") { + lastValuesDept should have size 1 + } + + val expectedFirstDept = sortedByHireDate.head.salary + val expectedLastDept = sortedByHireDate.last.salary + + withClue( + s"Department $dept\n" + + s" Expected first: ${sortedByHireDate.head.name} hired on ${sortedByHireDate.head.hire_date} with salary $expectedFirstDept\n" + + s" Actual first_in_dept: ${firstValuesDept.head}\n" + ) { + firstValuesDept.head shouldBe expectedFirstDept + } + + withClue( + s"Department $dept\n" + + s" Expected last: ${sortedByHireDate.last.name} hired on ${sortedByHireDate.last.hire_date} with salary $expectedLastDept\n" + + s" Actual last_in_dept: ${lastValuesDept.head}\n" + ) { + lastValuesDept.head shouldBe expectedLastDept + } + + log.info(f" ✓ $dept%-12s: ${emps.size} emps") + log.info( + f" FIRST_DEPT=$expectedFirstDept%6d (${sortedByHireDate.head.name}%-20s ${sortedByHireDate.head.hire_date})" + ) + log.info( + f" LAST_DEPT =$expectedLastDept%6d (${sortedByHireDate.last.name}%-20s ${sortedByHireDate.last.hire_date})" + ) + } + + // ============================================================ + // Checking the consistency between the two partitions + // ============================================================ + log.info("\n--- Verifying distinct partition results ---") + + byDept.foreach { case (dept, deptEmps) => + val deptLocations = deptEmps.map(_.location).distinct.sorted + + log.info(s"\n$dept has ${deptLocations.size} locations: ${deptLocations.mkString(", ")}") + + val deptFirstSalary = deptEmps.flatMap(_.first_in_dept).distinct.head + val deptLastSalary = deptEmps.flatMap(_.last_in_dept).distinct.head + val deptFirstHire = deptEmps.minBy(_.hire_date) + val deptLastHire = deptEmps.maxBy(_.hire_date) + + log.info(f" Department-level window:") + log.info( + f" FIRST = $deptFirstSalary%6d (${deptFirstHire.name} @ ${deptFirstHire.location}, ${deptFirstHire.hire_date})" + ) + log.info( + f" LAST = $deptLastSalary%6d (${deptLastHire.name} @ ${deptLastHire.location}, ${deptLastHire.hire_date})" + ) + + deptLocations.foreach { loc => + val locEmps = deptEmps.filter(_.location == loc) + val locFirstSalary = locEmps.flatMap(_.first_in_dept_loc).distinct.head + val locLastSalary = locEmps.flatMap(_.last_in_dept_loc).distinct.head + val locFirstHire = locEmps.minBy(_.hire_date) + val locLastHire = locEmps.maxBy(_.hire_date) + + log.info(f" Location-level window ($loc):") + log.info( + f" FIRST = $locFirstSalary%6d (${locFirstHire.name}, ${locFirstHire.hire_date})" + ) + log.info(f" LAST = $locLastSalary%6d (${locLastHire.name}, ${locLastHire.hire_date})") + + // Logical check : the hiring date at the departmental level + // must be <= on all dates at the level (department, location) + withClue( + s"$dept: First hire date at dept level (${deptFirstHire.hire_date}) " + + s"should be <= first hire at ($dept, $loc) level (${locFirstHire.hire_date})\n" + ) { + deptFirstHire.hire_date should be <= locFirstHire.hire_date + } + + withClue( + s"$dept: Last hire date at dept level (${deptLastHire.hire_date}) " + + s"should be >= last hire at ($dept, $loc) level (${locLastHire.hire_date})\n" + ) { + deptLastHire.hire_date should be >= locLastHire.hire_date + } + } + + if (deptLocations.size > 1) { + log.info(s" ✓ Different partitions produce different results (as expected)") + } else { + log.info(s" ℹ Single location - partition results are identical") + } + } + } + + "Search API with distinct window partitions" should "compute FIRST_VALUE and LAST_VALUE on different partitions" in { + val results = client.searchAs[EmployeeDistinctPartitions](""" + SELECT + department, + location, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department, location + ORDER BY hire_date ASC + ) AS first_in_dept_loc, + LAST_VALUE(salary) OVER ( + PARTITION BY department, location + ORDER BY hire_date ASC + ) AS last_in_dept_loc, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_in_dept, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS last_in_dept + FROM emp + WHERE department IN ('Engineering', 'Sales') + ORDER BY department, location, hire_date + LIMIT 20 + """) + + results match { + case ElasticSuccess(employees) => + checkEmployeesWithDistinctPartitions(employees) + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + } + + "Scroll API with distinct window partitions" should "compute correctly with streaming and different partitions" in { + val config = ScrollConfig(scrollSize = 5, logEvery = 5) + val startTime = System.currentTimeMillis() + + val futureResults = client + .scrollAs[EmployeeDistinctPartitions]( + """ + SELECT + department, + location, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department, location + ORDER BY hire_date ASC + ) AS first_in_dept_loc, + LAST_VALUE(salary) OVER ( + PARTITION BY department, location + ORDER BY hire_date ASC + ) AS last_in_dept_loc, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_in_dept, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS last_in_dept + FROM emp + ORDER BY department, location, hire_date + LIMIT 20 + """, + config + ) + .runWith(Sink.seq) + + futureResults await { value => + val duration = System.currentTimeMillis() - startTime + val results = value.map(_._1) + checkEmployeesWithDistinctPartitions(results) + duration should be < 1000L + } match { + case scala.util.Success(_) => // OK + case scala.util.Failure(ex) => fail(s"Scroll failed: ${ex.getMessage}") + } + } + } diff --git a/testkit/src/main/scala/app/softnetwork/elastic/model/window/package.scala b/testkit/src/main/scala/app/softnetwork/elastic/model/window/package.scala index 1074e96b..6ac4dac7 100644 --- a/testkit/src/main/scala/app/softnetwork/elastic/model/window/package.scala +++ b/testkit/src/main/scala/app/softnetwork/elastic/model/window/package.scala @@ -83,4 +83,51 @@ package object window { hire_date: String, first_in_dept_loc: Option[Int] = None ) + + case class EmployeeDistinctPartitions( + department: String, + location: String, + name: String, + salary: Int, + hire_date: String, + // Window 1: PARTITION BY department, location + first_in_dept_loc: Option[Int] = None, + last_in_dept_loc: Option[Int] = None, + // Window 2: PARTITION BY department (différent!) + first_in_dept: Option[Int] = None, + last_in_dept: Option[Int] = None + ) + + case class EmployeeMultiWindowPartitions( + department: String, + location: String, + level: String, + name: String, + salary: Int, + hire_date: String, + // Window 1: PARTITION BY department, location + first_salary_dept_loc: Option[Int] = None, + // Window 2: PARTITION BY department + first_salary_dept: Option[Int] = None, + // Window 3: PARTITION BY level + avg_salary_level: Option[Double] = None + ) + + case class EmployeeComplexWindows( + department: String, + location: String, + name: String, + salary: Int, + hire_date: String, + level: String, + // Window 1: PARTITION BY department, location ORDER BY hire_date + first_hire_dept_loc: Option[String] = None, + // Window 2: PARTITION BY department ORDER BY salary DESC + top_earner_dept: Option[String] = None, + // Window 3: PARTITION BY location ORDER BY hire_date + first_hire_location: Option[String] = None, + // Window 4: Global (no partition) + global_rank: Option[Int] = None + ) + } From 4d49bad507ffd30d8bddf0f28b401bf47e4ba0dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Sun, 30 Nov 2025 05:51:55 +0100 Subject: [PATCH 30/40] add specifications for window functions --- .../elastic/client/WindowFunctionSpec.scala | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala b/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala index 9b0ca5d9..243c19e8 100644 --- a/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala +++ b/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala @@ -1547,4 +1547,121 @@ trait WindowFunctionSpec } } + "Search and Scroll APIs with distinct window partitions" should "maintain consistency between them" in { + // Search + val searchResults = client.searchAs[EmployeeDistinctPartitions](""" + SELECT + department, + location, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department, location + ORDER BY hire_date ASC + ) AS first_in_dept_loc, + LAST_VALUE(salary) OVER ( + PARTITION BY department, location + ORDER BY hire_date ASC + ) AS last_in_dept_loc, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_in_dept, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS last_in_dept + FROM emp + WHERE department IN ('Engineering', 'Sales') + LIMIT 20 + """) match { + case ElasticSuccess(emps) => emps + case ElasticFailure(error) => fail(s"Search failed: ${error.message}") + } + + // Scroll + val config = ScrollConfig(scrollSize = 4) + val futureScrollResults = client + .scrollAs[EmployeeDistinctPartitions]( + """ + SELECT + department, + location, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department, location + ORDER BY hire_date ASC + ) AS first_in_dept_loc, + LAST_VALUE(salary) OVER ( + PARTITION BY department, location + ORDER BY hire_date ASC + ) AS last_in_dept_loc, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_in_dept, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS last_in_dept + FROM emp + WHERE department IN ('Engineering', 'Sales') + LIMIT 20 + """, + config + ) + .runWith(Sink.seq) + + val scrollResults = Await.result(futureScrollResults, 30.seconds).map(_._1) + + log.info(s"\n=== Comparing Search vs Scroll for distinct partitions ===") + log.info(s" Search: ${searchResults.size} results") + log.info(s" Scroll: ${scrollResults.size} results") + + searchResults.size shouldBe scrollResults.size + + // Compare Window 1: (dept, loc) + val searchByDeptLoc = searchResults.groupBy(e => (e.department, e.location)) + val scrollByDeptLoc = scrollResults.groupBy(e => (e.department, e.location)) + + searchByDeptLoc.keys shouldBe scrollByDeptLoc.keys + + log.info("\n--- Window 1: PARTITION BY (department, location) ---") + searchByDeptLoc.foreach { case (key @ (dept, loc), searchEmps) => + val scrollEmps = scrollByDeptLoc(key) + + val searchFirst = searchEmps.flatMap(_.first_in_dept_loc).distinct.head + val scrollFirst = scrollEmps.flatMap(_.first_in_dept_loc).distinct.head + val searchLast = searchEmps.flatMap(_.last_in_dept_loc).distinct.head + val scrollLast = scrollEmps.flatMap(_.last_in_dept_loc).distinct.head + + searchFirst shouldBe scrollFirst + searchLast shouldBe scrollLast + + log.info(s" ✓ ($dept, $loc): FIRST=$searchFirst, LAST=$searchLast (consistent)") + } + + // Compare Window 2: (dept) + val searchByDept = searchResults.groupBy(_.department) + val scrollByDept = scrollResults.groupBy(_.department) + + log.info("\n--- Window 2: PARTITION BY (department) ---") + searchByDept.foreach { case (dept, searchEmps) => + val scrollEmps = scrollByDept(dept) + + val searchFirst = searchEmps.flatMap(_.first_in_dept).distinct.head + val scrollFirst = scrollEmps.flatMap(_.first_in_dept).distinct.head + val searchLast = searchEmps.flatMap(_.last_in_dept).distinct.head + val scrollLast = scrollEmps.flatMap(_.last_in_dept).distinct.head + + searchFirst shouldBe scrollFirst + searchLast shouldBe scrollLast + + log.info(s" ✓ $dept: FIRST=$searchFirst, LAST=$searchLast (consistent)") + } + } + } From 95338435b117b010c102e9f43f5c8c5841954ff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Sun, 30 Nov 2025 09:55:56 +0100 Subject: [PATCH 31/40] add support for count, min, max, avg and sum with partitions --- .../sql/bridge/ElasticAggregation.scala | 105 ++-- .../elastic/client/ElasticConversion.scala | 160 +++--- .../softnetwork/elastic/client/package.scala | 11 +- .../client/ElasticConversionSpec.scala | 465 +++++++++++++++++- .../sql/bridge/ElasticAggregation.scala | 105 ++-- .../sql/macros/SQLQueryValidator.scala | 14 +- .../sql/function/aggregate/package.scala | 158 +++++- .../parser/function/aggregate/package.scala | 69 ++- .../elastic/sql/query/GroupBy.scala | 7 + .../elastic/sql/query/Select.scala | 7 +- .../elastic/client/WindowFunctionSpec.scala | 138 ++++++ 11 files changed, 1060 insertions(+), 179 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 5ba6de78..571a7c89 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -181,46 +181,73 @@ object ElasticAggregation { case AVG => aggWithFieldOrScript(avgAgg, (name, s) => avgAgg(name, sourceField).script(s)) case SUM => aggWithFieldOrScript(sumAgg, (name, s) => sumAgg(name, sourceField).script(s)) case th: WindowFunction => - val limit = { - th match { - case _: LastValue | _: FirstValue => Some(1) - case _ => th.limit.map(_.limit) - } - } - val topHits = - topHitsAgg(aggName) - .fetchSource( - th.identifier.name +: th.fields - .filterNot(_.isScriptField) - .filterNot(_.sourceField == th.identifier.name) - .map(_.sourceField) - .distinct - .toArray, - Array.empty - ) - .copy( - scripts = th.fields - .filter(_.isScriptField) - .groupBy(_.sourceField) - .map(_._2.head) - .map(f => f.sourceField -> Script(f.painless(None)).lang("painless")) - .toMap, - size = limit - ) sortBy th.orderBy.sorts.map(sort => - sort.order match { - case Some(Desc) => - th.window match { - case LAST_VALUE => FieldSort(sort.field.name).asc() - case _ => FieldSort(sort.field.name).desc() - } - case _ => - th.window match { - case LAST_VALUE => FieldSort(sort.field.name).desc() - case _ => FieldSort(sort.field.name).asc() - } + th.window match { + case COUNT => + val field = + sourceField match { + case "*" | "_id" | "_index" | "_type" => "_index" + case _ => sourceField + } + if (distinct) + cardinalityAgg(aggName, field) + else { + valueCountAgg(aggName, field) } - ) - topHits + case MIN => + aggWithFieldOrScript(minAgg, (name, s) => minAgg(name, sourceField).script(s)) + case MAX => + aggWithFieldOrScript(maxAgg, (name, s) => maxAgg(name, sourceField).script(s)) + case AVG => + aggWithFieldOrScript(avgAgg, (name, s) => avgAgg(name, sourceField).script(s)) + case SUM => + aggWithFieldOrScript(sumAgg, (name, s) => sumAgg(name, sourceField).script(s)) + case _ => + val limit = { + th match { + case _: LastValue | _: FirstValue => Some(1) + case _ => th.limit.map(_.limit) + } + } + val topHits = + topHitsAgg(aggName) + .fetchSource( + th.identifier.name +: th.fields + .filterNot(_.isScriptField) + .filterNot(_.sourceField == th.identifier.name) + .map(_.sourceField) + .distinct + .toArray, + Array.empty + ) + .copy( + scripts = th.fields + .filter(_.isScriptField) + .groupBy(_.sourceField) + .map(_._2.head) + .map(f => f.sourceField -> Script(f.painless(None)).lang("painless")) + .toMap, + size = limit, + sorts = th.orderBy + .map( + _.sorts.map(sort => + sort.order match { + case Some(Desc) => + th.window match { + case LAST_VALUE => FieldSort(sort.field.name).asc() + case _ => FieldSort(sort.field.name).desc() + } + case _ => + th.window match { + case LAST_VALUE => FieldSort(sort.field.name).desc() + case _ => FieldSort(sort.field.name).asc() + } + } + ) + ) + .getOrElse(Seq.empty) + ) + topHits + } case script: BucketScriptAggregation => val params = allAggregations.get(aggName) match { case Some(sqlAgg) => diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala index fc71bedb..8ece3037 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala @@ -148,11 +148,27 @@ trait ElasticConversion { case (None, Some(aggs)) => // Case 2 : only aggregations - parseAggregations(aggs, Map.empty, fieldAliases, aggregations) + val ret = parseAggregations(aggs, Map.empty, fieldAliases, aggregations) + val groupedRows: Map[String, Seq[Map[String, Any]]] = + ret.groupBy(_.getOrElse("bucket_root", "").toString) + groupedRows.values.foldLeft(Seq(Map.empty[String, Any])) { (acc, group) => + for { + accMap <- acc + groupMap <- group + } yield accMap ++ groupMap + } case (Some(hits), Some(aggs)) if hits.isEmpty => // Case 3 : aggregations with no hits - parseAggregations(aggs, Map.empty, fieldAliases, aggregations) + val ret = parseAggregations(aggs, Map.empty, fieldAliases, aggregations) + val groupedRows: Map[String, Seq[Map[String, Any]]] = + ret.groupBy(_.getOrElse("bucket_root", "").toString) + groupedRows.values.foldLeft(Seq(Map.empty[String, Any])) { (acc, group) => + for { + accMap <- acc + groupMap <- group + } yield accMap ++ groupMap + } case (Some(hits), Some(aggs)) if hits.nonEmpty => // Case 4 : Hits + global aggregations + top_hits aggregations @@ -355,7 +371,7 @@ trait ElasticConversion { } } else if (bucketAggs.isEmpty) { // No buckets : it is a leaf aggregation (metrics or top_hits) - val metrics = extractMetrics(aggsNode) + val metrics = extractMetrics(aggsNode, aggregations) val allTopHits = extractAllTopHits(aggsNode, fieldAliases, aggregations) if (allTopHits.nonEmpty) { @@ -369,6 +385,7 @@ trait ElasticConversion { // Handle each aggregation with buckets bucketAggs.flatMap { case (aggName, buckets, _) => buckets.flatMap { bucket => + val metrics = extractMetrics(bucket, aggregations) val allTopHits = extractAllTopHits(bucket, fieldAliases, aggregations) val bucketKey = extractBucketKey(bucket) @@ -379,7 +396,7 @@ trait ElasticConversion { val currentContext = parentContext ++ Map( aggName -> bucketKey, s"${aggName}_doc_count" -> docCount - ) ++ allTopHits + ) ++ metrics ++ allTopHits // Check for sub-aggregations val subAggFields = bucket @@ -468,62 +485,76 @@ trait ElasticConversion { /** Extract metrics from an aggregation node */ - def extractMetrics(aggsNode: JsonNode): Map[String, Any] = { + def extractMetrics( + aggsNode: JsonNode, + aggregations: Map[String, ClientAggregation] + ): Map[String, Any] = { if (!aggsNode.isObject) return Map.empty - aggsNode - .properties() - .asScala - .flatMap { entry => - val name = normalizeAggregationKey(entry.getKey) - val value = entry.getValue - - // Detect simple metric values - Option(value.get("value")) - .filter(!_.isNull) - .map { metricValue => - val numericValue = if (metricValue.isIntegralNumber) { - metricValue.asLong() - } else if (metricValue.isFloatingPointNumber) { - metricValue.asDouble() - } else { - metricValue.asText() - } - name -> numericValue + var bucketRoot: Option[String] = None + val metrics = + aggsNode + .properties() + .asScala + .flatMap { entry => + val name = normalizeAggregationKey(entry.getKey) + aggregations.get(name) match { + case Some(agg) => + bucketRoot = Some(agg.bucketRoot) + case _ => } - .orElse { - // Stats aggregations - if (value.has("count") && value.has("sum") && value.has("avg")) { - Some( - name -> Map( - "count" -> value.get("count").asLong(), - "sum" -> Option(value.get("sum")).filterNot(_.isNull).map(_.asDouble()), - "avg" -> Option(value.get("avg")).filterNot(_.isNull).map(_.asDouble()), - "min" -> Option(value.get("min")).filterNot(_.isNull).map(_.asDouble()), - "max" -> Option(value.get("max")).filterNot(_.isNull).map(_.asDouble()) - ).collect { case (k, Some(v)) => k -> v; case (k, v: Long) => k -> v } - ) - } else { - None + val value = entry.getValue + + // Detect simple metric values + Option(value.get("value")) + .filter(!_.isNull) + .map { metricValue => + val numericValue = if (metricValue.isIntegralNumber) { + metricValue.asLong() + } else if (metricValue.isFloatingPointNumber) { + metricValue.asDouble() + } else { + metricValue.asText() + } + name -> numericValue } - } - .orElse { - // Percentiles - if (value.has("values") && value.get("values").isObject) { - val percentiles = value - .get("values") - .properties() - .asScala - .map { pEntry => - pEntry.getKey -> pEntry.getValue.asDouble() - } - .toMap - Some(name -> percentiles) - } else { - None + .orElse { + // Stats aggregations + if (value.has("count") && value.has("sum") && value.has("avg")) { + Some( + name -> Map( + "count" -> value.get("count").asLong(), + "sum" -> Option(value.get("sum")).filterNot(_.isNull).map(_.asDouble()), + "avg" -> Option(value.get("avg")).filterNot(_.isNull).map(_.asDouble()), + "min" -> Option(value.get("min")).filterNot(_.isNull).map(_.asDouble()), + "max" -> Option(value.get("max")).filterNot(_.isNull).map(_.asDouble()) + ).collect { case (k, Some(v)) => k -> v; case (k, v: Long) => k -> v } + ) + } else { + None + } } - } - } - .toMap + .orElse { + // Percentiles + if (value.has("values") && value.get("values").isObject) { + val percentiles = value + .get("values") + .properties() + .asScala + .map { pEntry => + pEntry.getKey -> pEntry.getValue.asDouble() + } + .toMap + Some(name -> percentiles) + } else { + None + } + } + } + .toMap + bucketRoot match { + case Some(root) => metrics + ("bucket_root" -> root) + case None => metrics + } } /** Extract all top_hits aggregations with their names and hits */ @@ -533,6 +564,7 @@ trait ElasticConversion { aggregations: Map[String, ClientAggregation] ): Map[String, Any] = { if (!aggsNode.isObject) return Map.empty + var bucketRoot: Option[String] = None val allTopHits = aggsNode .properties() @@ -553,13 +585,20 @@ trait ElasticConversion { // Process each top_hits aggregation with their names val row = allTopHits.map { case (topHitName, hits) => // Determine if it is a multivalued aggregation (array_agg, ...) - val hasMultipleValues = aggregations.get(topHitName) match { + val agg = aggregations.get(topHitName) + val hasMultipleValues = agg match { case Some(agg) => agg.multivalued case None => // Fallback on naming convention if aggregation is not found !topHitName.toLowerCase.matches("(first|last)_.*") } + agg match { + case Some(agg) => + bucketRoot = Some(agg.bucketRoot) + case _ => + } + val processedHits = hits.map { hit => val source = extractSource(hit, fieldAliases) if (hasMultipleValues) { @@ -582,7 +621,7 @@ trait ElasticConversion { } else { val metadata = extractHitMetadata(hit) val innerHits = extractInnerHits(hit, fieldAliases) - source ++ metadata ++ innerHits + source ++ metadata ++ innerHits ++ Map("bucket_root" -> bucketRoot) } } @@ -600,7 +639,10 @@ trait ElasticConversion { } } - row + bucketRoot match { + case Some(root) => row + ("bucket_root" -> root) + case None => row + } } /** Extract global metrics from aggregations (for hits + aggs case) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/package.scala index 0b44e37c..14ff313c 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/package.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/package.scala @@ -171,7 +171,8 @@ package object client extends SerializationApi { distinct: Boolean, sourceField: String, windowing: Boolean, - bucketPath: String + bucketPath: String, + bucketRoot: String ) { def multivalued: Boolean = aggType == AggregationType.ArrayAgg def singleValued: Boolean = !multivalued @@ -187,6 +188,11 @@ package object client extends SerializationApi { case _: FirstValue => AggregationType.FirstValue case _: LastValue => AggregationType.LastValue case _: ArrayAgg => AggregationType.ArrayAgg + case _: CountAgg => AggregationType.Count + case _: MinAgg => AggregationType.Min + case _: MaxAgg => AggregationType.Max + case _: AvgAgg => AggregationType.Avg + case _: SumAgg => AggregationType.Sum case _ => throw new IllegalArgumentException(s"Unsupported aggregation type: ${agg.aggType}") } ClientAggregation( @@ -195,7 +201,8 @@ package object client extends SerializationApi { agg.distinct, agg.sourceField, agg.aggType.isWindowing, - agg.bucketPath + agg.bucketPath, + agg.bucketRoot ) } } diff --git a/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala index 45355a45..0fabf15f 100644 --- a/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala +++ b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala @@ -191,6 +191,7 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver distinct = false, "name", windowing = true, + "", "" ) ) @@ -645,6 +646,7 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver distinct = false, "name", windowing = true, + "", "" ) ) @@ -1017,7 +1019,8 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver distinct = false, "salary", windowing = true, - bucketPath = "department>location" + bucketPath = "department>location", + bucketRoot = "department" ), "last_in_dept_loc" -> ClientAggregation( aggName = "last_in_dept_loc", @@ -1025,7 +1028,8 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver distinct = false, "salary", windowing = true, - bucketPath = "department>location" + bucketPath = "department>location", + bucketRoot = "department" ), "first_in_dept" -> ClientAggregation( aggName = "first_in_dept", @@ -1033,7 +1037,8 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver distinct = false, "salary", windowing = true, - bucketPath = "department" + bucketPath = "department", + bucketRoot = "department" ), "last_in_dept" -> ClientAggregation( aggName = "last_in_dept", @@ -1041,7 +1046,8 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver distinct = false, "salary", windowing = true, - bucketPath = "department" + bucketPath = "department", + bucketRoot = "department" ) ) parseResponse( @@ -1059,6 +1065,451 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver throw error } } + + it should "parse window results with metrics" in { + val results = + """ + { + "took" : 60, + "timed_out" : false, + "_shards" : { + "failed" : 0.0, + "successful" : 1.0, + "total" : 1.0, + "skipped" : 0.0 + }, + "hits" : { + "total" : { + "relation" : "eq", + "value" : 20 + }, + "hits" : [ ], + "max_score" : null + }, + "aggregations" : { + "sterms#level" : { + "buckets" : [ { + "avg#avg_salary_level" : { + "value" : 95000.0 + }, + "doc_count" : 7, + "key" : "Senior" + }, { + "avg#avg_salary_level" : { + "value" : 70600.0 + }, + "doc_count" : 5, + "key" : "Junior" + }, { + "avg#avg_salary_level" : { + "value" : 79200.0 + }, + "doc_count" : 5, + "key" : "Mid" + }, { + "avg#avg_salary_level" : { + "value" : 107500.0 + }, + "doc_count" : 2, + "key" : "Lead" + }, { + "avg#avg_salary_level" : { + "value" : 130000.0 + }, + "doc_count" : 1, + "key" : "Principal" + } ], + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0 + }, + "sterms#department" : { + "buckets" : [ { + "sterms#location" : { + "buckets" : [ { + "top_hits#first_salary_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 3 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_2", + "_score" : null, + "_source" : { + "salary" : 120000 + }, + "sort" : [ 1515542400000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 3, + "key" : "New York" + }, { + "top_hits#first_salary_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 3 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_19", + "_score" : null, + "_source" : { + "salary" : 130000 + }, + "sort" : [ 1433116800000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 3, + "key" : "San Francisco" + }, { + "top_hits#first_salary_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 1 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_16", + "_score" : null, + "_source" : { + "salary" : 105000 + }, + "sort" : [ 1460246400000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 1, + "key" : "Remote" + } ], + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0 + }, + "top_hits#first_salary_dept" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 7 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_19", + "_score" : null, + "_source" : { + "salary" : 130000 + }, + "sort" : [ 1433116800000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 7, + "key" : "Engineering" + }, { + "sterms#location" : { + "buckets" : [ { + "top_hits#first_salary_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 3 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_7", + "_score" : null, + "_source" : { + "salary" : 90000 + }, + "sort" : [ 1543536000000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 3, + "key" : "Chicago" + }, { + "top_hits#first_salary_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 2 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_9", + "_score" : null, + "_source" : { + "salary" : 95000 + }, + "sort" : [ 1488931200000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 2, + "key" : "New York" + }, { + "top_hits#first_salary_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 1 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_17", + "_score" : null, + "_source" : { + "salary" : 92000 + }, + "sort" : [ 1551312000000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 1, + "key" : "Remote" + } ], + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0 + }, + "top_hits#first_salary_dept" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 6 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_9", + "_score" : null, + "_source" : { + "salary" : 95000 + }, + "sort" : [ 1488931200000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 6, + "key" : "Sales" + }, { + "sterms#location" : { + "buckets" : [ { + "top_hits#first_salary_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 2 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_11", + "_score" : null, + "_source" : { + "salary" : 88000 + }, + "sort" : [ 1526342400000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 2, + "key" : "San Francisco" + }, { + "top_hits#first_salary_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 1 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_12", + "_score" : null, + "_source" : { + "salary" : 65000 + }, + "sort" : [ 1611100800000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 1, + "key" : "Chicago" + }, { + "top_hits#first_salary_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 1 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_18", + "_score" : null, + "_source" : { + "salary" : 81000 + }, + "sort" : [ 1599696000000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 1, + "key" : "Remote" + } ], + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0 + }, + "top_hits#first_salary_dept" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 4 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_11", + "_score" : null, + "_source" : { + "salary" : 88000 + }, + "sort" : [ 1526342400000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 4, + "key" : "Marketing" + }, { + "sterms#location" : { + "buckets" : [ { + "top_hits#first_salary_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 2 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_15", + "_score" : null, + "_source" : { + "salary" : 85000 + }, + "sort" : [ 1512086400000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 2, + "key" : "New York" + }, { + "top_hits#first_salary_dept_loc" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 1 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_14", + "_score" : null, + "_source" : { + "salary" : 68000 + }, + "sort" : [ 1604534400000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 1, + "key" : "Chicago" + } ], + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0 + }, + "top_hits#first_salary_dept" : { + "hits" : { + "total" : { + "relation" : "eq", + "value" : 3 + }, + "hits" : [ { + "_index" : "emp", + "_id" : "emp_15", + "_score" : null, + "_source" : { + "salary" : 85000 + }, + "sort" : [ 1512086400000 ] + } ], + "max_score" : null + } + }, + "doc_count" : 3, + "key" : "HR" + } ], + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0 + } + } + } + """ + val aggregations = + Map( + "first_salary_dept_loc" -> ClientAggregation( + aggName = "first_salary_dept_loc", + aggType = AggregationType.FirstValue, + distinct = false, + "salary", + windowing = true, + bucketPath = "department>location", + bucketRoot = "department" + ), + "first_salary_dept" -> ClientAggregation( + aggName = "first_salary_dept", + aggType = AggregationType.FirstValue, + distinct = false, + "salary", + windowing = true, + bucketPath = "department", + bucketRoot = "department" + ), + "avg_salary_level" -> ClientAggregation( + aggName = "avg_salary_level", + aggType = AggregationType.Avg, + distinct = false, + "salary", + windowing = true, + bucketPath = "level", + bucketRoot = "level" + ) + ) + parseResponse( + results, + Map.empty, + aggregations + ) match { + case Success(results) => + val rows = results.map(row => extractAggregationValues(row, aggregations)) + rows.foreach(println) + val windows = rows.map(row => convertTo[EmployeeMultiWindowPartitions](row)).distinct + windows.foreach(println) + windows.size shouldBe 55 + case Failure(error) => + throw error + } + } } case class Products(category: String, top_products: List[Product], avg_price: Double) @@ -1101,3 +1552,9 @@ case class EmployeeDistinctPartitions( first_in_dept: Option[Int] = None, last_in_dept: Option[Int] = None ) + +case class EmployeeMultiWindowPartitions( + first_salary_dept_loc: Option[Int] = None, + first_salary_dept: Option[Int] = None, + avg_salary_level: Option[Double] = None +) diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 769e6265..ea2ae889 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -181,46 +181,73 @@ object ElasticAggregation { case AVG => aggWithFieldOrScript(avgAgg, (name, s) => avgAgg(name, sourceField).script(s)) case SUM => aggWithFieldOrScript(sumAgg, (name, s) => sumAgg(name, sourceField).script(s)) case th: WindowFunction => - val limit = { - th match { - case _: LastValue | _: FirstValue => Some(1) - case _ => th.limit.map(_.limit) - } - } - val topHits = - topHitsAgg(aggName) - .fetchSource( - th.identifier.name +: th.fields - .filterNot(_.isScriptField) - .filterNot(_.sourceField == th.identifier.name) - .map(_.sourceField) - .distinct - .toArray, - Array.empty - ) - .copy( - scripts = th.fields - .filter(_.isScriptField) - .groupBy(_.sourceField) - .map(_._2.head) - .map(f => f.sourceField -> Script(f.painless(None)).lang("painless")) - .toMap, - size = limit - ) sortBy th.orderBy.sorts.map(sort => - sort.order match { - case Some(Desc) => - th.window match { - case LAST_VALUE => FieldSort(sort.field.name).asc() - case _ => FieldSort(sort.field.name).desc() - } - case _ => - th.window match { - case LAST_VALUE => FieldSort(sort.field.name).desc() - case _ => FieldSort(sort.field.name).asc() - } + th.window match { + case COUNT => + val field = + sourceField match { + case "*" | "_id" | "_index" | "_type" => "_index" + case _ => sourceField + } + if (distinct) + cardinalityAgg(aggName, field) + else { + valueCountAgg(aggName, field) } - ) - topHits + case MIN => + aggWithFieldOrScript(minAgg, (name, s) => minAgg(name, sourceField).script(s)) + case MAX => + aggWithFieldOrScript(maxAgg, (name, s) => maxAgg(name, sourceField).script(s)) + case AVG => + aggWithFieldOrScript(avgAgg, (name, s) => avgAgg(name, sourceField).script(s)) + case SUM => + aggWithFieldOrScript(sumAgg, (name, s) => sumAgg(name, sourceField).script(s)) + case _ => + val limit = { + th match { + case _: LastValue | _: FirstValue => Some(1) + case _ => th.limit.map(_.limit) + } + } + val topHits = + topHitsAgg(aggName) + .fetchSource( + th.identifier.name +: th.fields + .filterNot(_.isScriptField) + .filterNot(_.sourceField == th.identifier.name) + .map(_.sourceField) + .distinct + .toArray, + Array.empty + ) + .copy( + scripts = th.fields + .filter(_.isScriptField) + .groupBy(_.sourceField) + .map(_._2.head) + .map(f => f.sourceField -> Script(f.painless(None)).lang("painless")) + .toMap, + size = limit, + sorts = th.orderBy + .map( + _.sorts.map(sort => + sort.order match { + case Some(Desc) => + th.window match { + case LAST_VALUE => FieldSort(sort.field.name).asc() + case _ => FieldSort(sort.field.name).desc() + } + case _ => + th.window match { + case LAST_VALUE => FieldSort(sort.field.name).desc() + case _ => FieldSort(sort.field.name).asc() + } + } + ) + ) + .getOrElse(Seq.empty) + ) + topHits + } case script: BucketScriptAggregation => val params = allAggregations.get(aggName) match { case Some(sqlAgg) => diff --git a/macros/src/main/scala/app/softnetwork/elastic/sql/macros/SQLQueryValidator.scala b/macros/src/main/scala/app/softnetwork/elastic/sql/macros/SQLQueryValidator.scala index 35a4cd3d..0afd31b1 100644 --- a/macros/src/main/scala/app/softnetwork/elastic/sql/macros/SQLQueryValidator.scala +++ b/macros/src/main/scala/app/softnetwork/elastic/sql/macros/SQLQueryValidator.scala @@ -17,7 +17,7 @@ package app.softnetwork.elastic.sql.macros import app.softnetwork.elastic.sql.`type`.{SQLType, SQLTypes} -import app.softnetwork.elastic.sql.function.aggregate.COUNT +import app.softnetwork.elastic.sql.function.aggregate.{COUNT, WindowFunction} import app.softnetwork.elastic.sql.parser.Parser import app.softnetwork.elastic.sql.query.SQLSearchRequest @@ -201,8 +201,16 @@ trait SQLQueryValidator { // Check if any field is a wildcard (*) val hasWildcard = parsedQuery.select.fields.exists { field => field.identifier.name == "*" && (field.aggregateFunction match { - case Some(COUNT) => - false + case Some(agg) => + agg match { + case COUNT => false // COUNT(*) is allowed + case th: WindowFunction => + th.window match { + case COUNT => false // COUNT(*) window function is allowed + case _ => true // Other window functions with * are not allowed + } + case _ => true // Other aggregates with * are not allowed + } case _ => true }) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala index 6349c87c..418fa418 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala @@ -45,15 +45,15 @@ package object aggregate { } - case object COUNT extends Expr("COUNT") with AggregateFunction + case object COUNT extends Expr("COUNT") with AggregateFunction with Window - case object MIN extends Expr("MIN") with AggregateFunction + case object MIN extends Expr("MIN") with AggregateFunction with Window - case object MAX extends Expr("MAX") with AggregateFunction + case object MAX extends Expr("MAX") with AggregateFunction with Window - case object AVG extends Expr("AVG") with AggregateFunction + case object AVG extends Expr("AVG") with AggregateFunction with Window - case object SUM extends Expr("SUM") with AggregateFunction + case object SUM extends Expr("SUM") with AggregateFunction with Window sealed trait Window extends TokenRegex @@ -116,7 +116,7 @@ package object aggregate { with Updateable { def partitionBy: Seq[Identifier] def withPartitionBy(partitionBy: Seq[Identifier]): WindowFunction - def orderBy: OrderBy + def orderBy: Option[OrderBy] def window: Window def limit: Option[Limit] @@ -131,10 +131,15 @@ package object aggregate { }.toMap override def sql: String = { - val partitionByStr = - if (partitionBy.nonEmpty) s"$PARTITION_BY ${partitionBy.mkString(", ")}" - else "" - s"$window($identifier) $OVER ($partitionByStr$orderBy)" + (partitionBy, orderBy) match { + case (Nil, None) => s"$window($identifier)" + case _ => + val orderByStr = orderBy.map(_.sql).getOrElse("") + val partitionByStr = + if (partitionBy.nonEmpty) s"$PARTITION_BY ${partitionBy.mkString(", ")}" + else "" + s"$window($identifier) $OVER ($partitionByStr$orderByStr)" + } } override def toSQL(base: String): String = sql @@ -164,7 +169,7 @@ package object aggregate { case class FirstValue( identifier: Identifier, partitionBy: Seq[Identifier] = Seq.empty, - orderBy: OrderBy, + orderBy: Option[OrderBy], fields: Seq[Field] = Seq.empty ) extends WindowFunction { override def limit: Option[Limit] = Some(Limit(1, None)) @@ -177,14 +182,14 @@ package object aggregate { .asInstanceOf[FirstValue] .copy( identifier = identifier.update(request), - orderBy = orderBy.update(request) + orderBy = orderBy.map(_.update(request)) ) } case class LastValue( identifier: Identifier, partitionBy: Seq[Identifier] = Seq.empty, - orderBy: OrderBy, + orderBy: Option[OrderBy], fields: Seq[Field] = Seq.empty ) extends WindowFunction { override def limit: Option[Limit] = Some(Limit(1, None)) @@ -197,30 +202,149 @@ package object aggregate { .asInstanceOf[LastValue] .copy( identifier = identifier.update(request), - orderBy = orderBy.update(request) + orderBy = orderBy.map(_.update(request)) ) } case class ArrayAgg( identifier: Identifier, partitionBy: Seq[Identifier] = Seq.empty, - orderBy: OrderBy, + orderBy: Option[OrderBy], fields: Seq[Field] = Seq.empty, limit: Option[Limit] = None ) extends WindowFunction { override def window: Window = ARRAY_AGG override def withPartitionBy(partitionBy: Seq[Identifier]): WindowFunction = this.copy(partitionBy = partitionBy) - override def withFields(fields: Seq[Field]): WindowFunction = this + override def withFields(fields: Seq[Field]): WindowFunction = this.copy(fields = fields) override def update(request: SQLSearchRequest): WindowFunction = super .update(request) .asInstanceOf[ArrayAgg] .copy( identifier = identifier.update(request), - orderBy = orderBy.update(request), + orderBy = orderBy.map(_.update(request)), limit = limit.orElse(request.limit) ) override def multivalued: Boolean = true } + case class CountAgg( + identifier: Identifier, + partitionBy: Seq[Identifier] = Seq.empty, + fields: Seq[Field] = Seq.empty + ) extends WindowFunction { + override def limit: Option[Limit] = None + + override def orderBy: Option[OrderBy] = None + + override def window: Window = COUNT + + override def withPartitionBy(partitionBy: Seq[Identifier]): WindowFunction = + this.copy(partitionBy = partitionBy) + + override def withFields(fields: Seq[Field]): WindowFunction = this.copy(fields = fields) + + override def update(request: SQLSearchRequest): WindowFunction = super + .update(request) + .asInstanceOf[CountAgg] + .copy( + identifier = identifier.update(request) + ) + } + + case class MinAgg( + identifier: Identifier, + partitionBy: Seq[Identifier] = Seq.empty, + fields: Seq[Field] = Seq.empty + ) extends WindowFunction { + override def limit: Option[Limit] = None + + override def orderBy: Option[OrderBy] = None + + override def window: Window = MIN + + override def withPartitionBy(partitionBy: Seq[Identifier]): WindowFunction = + this.copy(partitionBy = partitionBy) + + override def withFields(fields: Seq[Field]): WindowFunction = this.copy(fields = fields) + + override def update(request: SQLSearchRequest): WindowFunction = super + .update(request) + .asInstanceOf[MinAgg] + .copy( + identifier = identifier.update(request) + ) + } + + case class MaxAgg( + identifier: Identifier, + partitionBy: Seq[Identifier] = Seq.empty, + fields: Seq[Field] = Seq.empty + ) extends WindowFunction { + override def limit: Option[Limit] = None + + override def orderBy: Option[OrderBy] = None + + override def window: Window = MAX + + override def withPartitionBy(partitionBy: Seq[Identifier]): WindowFunction = + this.copy(partitionBy = partitionBy) + + override def withFields(fields: Seq[Field]): WindowFunction = this.copy(fields = fields) + + override def update(request: SQLSearchRequest): WindowFunction = super + .update(request) + .asInstanceOf[MaxAgg] + .copy( + identifier = identifier.update(request) + ) + } + + case class AvgAgg( + identifier: Identifier, + partitionBy: Seq[Identifier] = Seq.empty, + fields: Seq[Field] = Seq.empty + ) extends WindowFunction { + override def limit: Option[Limit] = None + + override def orderBy: Option[OrderBy] = None + + override def window: Window = AVG + + override def withPartitionBy(partitionBy: Seq[Identifier]): WindowFunction = + this.copy(partitionBy = partitionBy) + + override def withFields(fields: Seq[Field]): WindowFunction = this.copy(fields = fields) + + override def update(request: SQLSearchRequest): WindowFunction = super + .update(request) + .asInstanceOf[AvgAgg] + .copy( + identifier = identifier.update(request) + ) + } + + case class SumAgg( + identifier: Identifier, + partitionBy: Seq[Identifier] = Seq.empty, + fields: Seq[Field] = Seq.empty + ) extends WindowFunction { + override def limit: Option[Limit] = None + + override def orderBy: Option[OrderBy] = None + + override def window: Window = SUM + + override def withPartitionBy(partitionBy: Seq[Identifier]): WindowFunction = + this.copy(partitionBy = partitionBy) + + override def withFields(fields: Seq[Field]): WindowFunction = this.copy(fields = fields) + + override def update(request: SQLSearchRequest): WindowFunction = super + .update(request) + .asInstanceOf[SumAgg] + .copy( + identifier = identifier.update(request) + ) + } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala index f32e2cd9..66038135 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala @@ -50,39 +50,80 @@ package object aggregate { } def partition_by: PackratParser[Seq[Identifier]] = - PARTITION_BY.regex ~> rep1sep(identifier, separator) + PARTITION_BY.regex ~> rep1sep(identifierWithTransformation | identifier, separator) - private[this] def over: Parser[(Seq[Identifier], OrderBy)] = - OVER.regex ~> start ~ partition_by.? ~ orderBy <~ end ^^ { case _ ~ pb ~ ob => + private[this] def over: Parser[(Seq[Identifier], Option[OrderBy])] = + OVER.regex ~> start ~ partition_by.? ~ orderBy.? <~ end ^^ { case _ ~ pb ~ ob => (pb.getOrElse(Seq.empty), ob) } - private[this] def top_hits: PackratParser[(Identifier, Seq[Identifier], OrderBy)] = - start ~ identifier ~ end ~ over.? ^^ { case _ ~ id ~ _ ~ o => + private[this] def window_function( + windowId: PackratParser[Identifier] = identifier + ): PackratParser[(Identifier, Seq[Identifier], Option[OrderBy])] = + start ~ windowId ~ end ~ over.? ^^ { case _ ~ id ~ _ ~ o => o match { case Some((pb, ob)) => (id, pb, ob) - case None => (id, Seq.empty, OrderBy(Seq(FieldSort(id, order = None)))) + case None => (id, Seq.empty, None) } } def first_value: PackratParser[WindowFunction] = - FIRST_VALUE.regex ~ top_hits ^^ { case _ ~ top => - FirstValue(top._1, top._2, top._3) + FIRST_VALUE.regex ~ window_function() ^^ { case _ ~ top => + FirstValue( + top._1, + top._2, + top._3.orElse(Option(OrderBy(Seq(FieldSort(top._1, order = None))))) + ) } def last_value: PackratParser[WindowFunction] = - LAST_VALUE.regex ~ top_hits ^^ { case _ ~ top => - LastValue(top._1, top._2, top._3) + LAST_VALUE.regex ~ window_function() ^^ { case _ ~ top => + LastValue( + top._1, + top._2, + top._3.orElse(Option(OrderBy(Seq(FieldSort(top._1, order = None))))) + ) } def array_agg: PackratParser[WindowFunction] = - ARRAY_AGG.regex ~ top_hits ^^ { case _ ~ top => - ArrayAgg(top._1, top._2, top._3, limit = None) + ARRAY_AGG.regex ~ window_function() ^^ { case _ ~ top => + ArrayAgg( + top._1, + top._2, + top._3.orElse(Option(OrderBy(Seq(FieldSort(top._1, order = None))))), + limit = None + ) + } + + def count_agg: PackratParser[WindowFunction] = + count ~ window_function() ^^ { case _ ~ top => + CountAgg(top._1, top._2) + } + + def min_agg: PackratParser[WindowFunction] = + min ~ window_function() ^^ { case _ ~ top => + MinAgg(top._1, top._2) + } + + def max_agg: PackratParser[WindowFunction] = + max ~ window_function() ^^ { case _ ~ top => + MaxAgg(top._1, top._2) + } + + def avg_agg: PackratParser[WindowFunction] = + avg ~ window_function() ^^ { case _ ~ top => + AvgAgg(top._1, top._2) + } + + def sum_agg: PackratParser[WindowFunction] = + sum ~ window_function() ^^ { case _ ~ top => + SumAgg(top._1, top._2) } def identifierWithWindowFunction: PackratParser[Identifier] = - (first_value | last_value | array_agg) ^^ { th => - th.identifier.withFunctions(th +: th.identifier.functions) + (first_value | last_value | array_agg | count_agg | min_agg | max_agg | avg_agg | sum_agg) ^^ { + th => + th.identifier.withFunctions(th +: th.identifier.functions) } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala index cb5f81b8..df04b664 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala @@ -208,6 +208,13 @@ case class BucketNode( case None => identifier } } + + def root: BucketNode = { + parent match { + case Some(p) => p.root + case None => this + } + } } case class BucketTree( diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala index 8dcbebe8..fa3cbd26 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala @@ -139,7 +139,6 @@ case class SQLAggregation( aggType: AggregateFunction, direction: Option[SortOrder] = None, nestedElement: Option[NestedElement] = None, - buckets: Seq[String] = Seq.empty, bucketPath: String = "" ) { val nested: Boolean = nestedElement.nonEmpty @@ -149,6 +148,11 @@ case class SQLAggregation( case COUNT => true case _ => false }) + val bucketRoot: String = + bucketPath.split(">").toSeq match { + case Nil => "" + case seq => seq.head + } } object SQLAggregation { @@ -236,7 +240,6 @@ object SQLAggregation { aggType = aggType, direction = direction, nestedElement = identifier.nestedElement, - buckets = request.buckets.map { _.name }, bucketPath = bucketPath ) ) diff --git a/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala b/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala index 243c19e8..1c648d7a 100644 --- a/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala +++ b/testkit/src/main/scala/app/softnetwork/elastic/client/WindowFunctionSpec.scala @@ -1499,6 +1499,71 @@ trait WindowFunctionSpec } } + it should "handle 3 different window partitions simultaneously" in { + val results = client.searchAs[EmployeeMultiWindowPartitions](""" + SELECT + department, + location, + level, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department, location + ORDER BY hire_date ASC + ) AS first_salary_dept_loc, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary_dept, + AVG(salary) OVER ( + PARTITION BY level + ) AS avg_salary_level + FROM emp + ORDER BY department, location, hire_date + LIMIT 20 + """) + + results match { + case ElasticSuccess(employees) => + employees should have size 20 + + log.info(s"\n=== Testing 3 distinct window partitions ===") + + // Window 1: PARTITION BY department, location + val byDeptLoc = employees.groupBy(e => (e.department, e.location)) + log.info(s"\nWindow 1: ${byDeptLoc.size} partitions (department, location)") + byDeptLoc.foreach { case ((dept, loc), emps) => + val firstValues = emps.flatMap(_.first_salary_dept_loc).distinct + firstValues should have size 1 + log.info(s" ✓ ($dept, $loc): FIRST=${firstValues.head}") + } + + // Window 2: PARTITION BY department + val byDept = employees.groupBy(_.department) + log.info(s"\nWindow 2: ${byDept.size} partitions (department)") + byDept.foreach { case (dept, emps) => + val firstValues = emps.flatMap(_.first_salary_dept).distinct + firstValues should have size 1 + log.info(s" ✓ $dept: FIRST=${firstValues.head}") + } + + // Window 3: PARTITION BY level + val byLevel = employees.groupBy(_.level) + log.info(s"\nWindow 3: ${byLevel.size} partitions (level)") + byLevel.foreach { case (level, emps) => + val avgValues = emps.flatMap(_.avg_salary_level).distinct + avgValues should have size 1 + val expectedAvg = emps.map(_.salary).sum.toDouble / emps.size + avgValues.head shouldBe expectedAvg +- 0.01 + log.info(s" ✓ $level: AVG=${avgValues.head} (${emps.size} employees)") + } + + case ElasticFailure(error) => + fail(s"Query failed: ${error.message}") + } + } + "Scroll API with distinct window partitions" should "compute correctly with streaming and different partitions" in { val config = ScrollConfig(scrollSize = 5, logEvery = 5) val startTime = System.currentTimeMillis() @@ -1547,6 +1612,79 @@ trait WindowFunctionSpec } } + it should "handle 3 distinct partitions with small scroll size" in { + val config = ScrollConfig(scrollSize = 3, logEvery = 3) + val startTime = System.currentTimeMillis() + + var batchCount = 0 + val futureResults = client + .scrollAs[EmployeeMultiWindowPartitions]( + """ + SELECT + department, + location, + level, + name, + salary, + hire_date, + FIRST_VALUE(salary) OVER ( + PARTITION BY department, location + ORDER BY hire_date ASC + ) AS first_salary_dept_loc, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary_dept, + AVG(salary) OVER ( + PARTITION BY level + ) AS avg_salary_level + FROM emp + LIMIT 20 + """, + config + ) + .map { batch => + batchCount += 1 + log.info(s" Batch $batchCount: ${batch._2.totalDocuments} documents") + batch + } + .runWith(Sink.seq) + + val results = Await.result(futureResults, 30.seconds).map(_._1) + val duration = System.currentTimeMillis() - startTime + + results should have size 20 + log.info(s"\n✓ Scrolled ${results.size} documents in $batchCount batches (${duration}ms)") + + // Check all 3 partitions + val byDeptLoc = results.groupBy(e => (e.department, e.location)) + val byDept = results.groupBy(_.department) + val byLevel = results.groupBy(_.level) + + log.info(s"\nPartition counts:") + log.info(s" (department, location): ${byDeptLoc.size} partitions") + log.info(s" (department): ${byDept.size} partitions") + log.info(s" (level): ${byLevel.size} partitions") + + // Check each partition + byDeptLoc.foreach { case ((dept, loc), emps) => + val firstValues = emps.flatMap(_.first_salary_dept_loc).distinct + firstValues should have size 1 + } + + byDept.foreach { case (dept, emps) => + val firstValues = emps.flatMap(_.first_salary_dept).distinct + firstValues should have size 1 + } + + byLevel.foreach { case (level, emps) => + val avgValues = emps.flatMap(_.avg_salary_level).distinct + avgValues should have size 1 + } + + log.info("✓ All 3 partitions computed correctly") + } + "Search and Scroll APIs with distinct window partitions" should "maintain consistency between them" in { // Search val searchResults = client.searchAs[EmployeeDistinctPartitions](""" From 2b85c8383cac3570afcad9de2fc8ad03cd648ef5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Sun, 30 Nov 2025 11:07:21 +0100 Subject: [PATCH 32/40] to fix sql query specifications --- .../elastic/sql/SQLQuerySpec.scala | 20 +++++++++---------- .../elastic/sql/SQLQuerySpec.scala | 20 +++++++++---------- .../sql/function/aggregate/package.scala | 2 +- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index dd0f26cf..0b1b368d 100644 --- a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -1205,11 +1205,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "aggs": { - | "ct": { - | "value_count": { - | "field": "identifier2" - | } - | }, | "lastSeen": { | "max": { | "field": "createdAt", @@ -1218,6 +1213,11 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : LocalDate.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"))" | } | } + | }, + | "ct": { + | "value_count": { + | "field": "identifier2" + | } | } | } | } @@ -1373,11 +1373,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "aggs": { - | "ct": { - | "value_count": { - | "field": "identifier2" - | } - | }, | "lastSeen": { | "max": { | "field": "createdAt", @@ -1386,6 +1381,11 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : ZonedDateTime.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")).truncatedTo(ChronoUnit.MINUTES).get(ChronoField.YEAR)" | } | } + | }, + | "ct": { + | "value_count": { + | "field": "identifier2" + | } | } | } | } diff --git a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index e741bc9e..535c0c1e 100644 --- a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -1205,11 +1205,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "aggs": { - | "ct": { - | "value_count": { - | "field": "identifier2" - | } - | }, | "lastSeen": { | "max": { | "field": "createdAt", @@ -1218,6 +1213,11 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : LocalDate.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"))" | } | } + | }, + | "ct": { + | "value_count": { + | "field": "identifier2" + | } | } | } | } @@ -1373,11 +1373,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "aggs": { - | "ct": { - | "value_count": { - | "field": "identifier2" - | } - | }, | "lastSeen": { | "max": { | "field": "createdAt", @@ -1386,6 +1381,11 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : ZonedDateTime.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")).truncatedTo(ChronoUnit.MINUTES).get(ChronoField.YEAR)" | } | } + | }, + | "ct": { + | "value_count": { + | "field": "identifier2" + | } | } | } | } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala index 418fa418..1e853ae5 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala @@ -216,7 +216,7 @@ package object aggregate { override def window: Window = ARRAY_AGG override def withPartitionBy(partitionBy: Seq[Identifier]): WindowFunction = this.copy(partitionBy = partitionBy) - override def withFields(fields: Seq[Field]): WindowFunction = this.copy(fields = fields) + override def withFields(fields: Seq[Field]): WindowFunction = this override def update(request: SQLSearchRequest): WindowFunction = super .update(request) .asInstanceOf[ArrayAgg] From 5b277255ba71bc8cae2cd5f080e49ceade7a3df7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 1 Dec 2025 08:39:36 +0100 Subject: [PATCH 33/40] to fix window function parsing + mapping within bucket scripts --- .../sql/bridge/ElasticAggregation.scala | 10 +- .../elastic/sql/SQLQuerySpec.scala | 164 +++++++++--------- .../sql/bridge/ElasticAggregation.scala | 10 +- .../elastic/sql/SQLQuerySpec.scala | 20 +-- .../elastic/sql/parser/GroupByParser.scala | 1 + .../elastic/sql/parser/OrderByParser.scala | 1 + .../elastic/sql/parser/Parser.scala | 7 +- .../elastic/sql/parser/SelectParser.scala | 4 +- .../elastic/sql/parser/WhereParser.scala | 1 + .../parser/function/aggregate/package.scala | 10 +- 10 files changed, 118 insertions(+), 110 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 571a7c89..264e0ded 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -659,17 +659,17 @@ object ElasticAggregation { val currentBucketPath = bucketScriptAggregation.identifier.nestedElement.map(_.nestedPath).getOrElse("") // Extract ALL metrics paths - val allMetricsPaths = bucketScriptAggregation.params.keys + val allMetricsPaths = bucketScriptAggregation.params val result = - allMetricsPaths.flatMap { metricName => - allAggregations.find(agg => agg.aggName == metricName || agg.field == metricName) match { + allMetricsPaths.flatMap { metric => + allAggregations.find(agg => agg.aggName == metric._2 || agg.field == metric._2) match { case Some(sqlAgg) => val metricBucketPath = sqlAgg.nestedElement .map(_.nestedPath) .getOrElse("") if (metricBucketPath == currentBucketPath) { // Metric of the same level - Some(metricName -> metricName) + Some(metric._1 -> metric._2) } else if (isDirectChild(metricBucketPath, currentBucketPath)) { // Metric of a direct child // CHECK if it is a "global" metric (cardinality, etc.) or a bucket metric (avg, sum, etc.) @@ -683,7 +683,7 @@ object ElasticAggregation { // println( // s"[DEBUG extractMetricsPath] Direct child (global metric): $metricName -> $childNestedName>$metricName" // ) - Some(metricName -> s"$childNestedName>$metricName") + Some(metric._1 -> s"$childNestedName>${metric._2}") } else { // Bucket metric: cannot be referenced from the parent // println( diff --git a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 0b1b368d..a14d466f 100644 --- a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -1182,47 +1182,47 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { println(query) query shouldBe """{ - | "query": { - | "bool": { - | "filter": [ - | { - | "exists": { - | "field": "identifier2" - | } - | } - | ] - | } - | }, - | "size": 0, - | "_source": false, - | "aggs": { - | "identifier": { - | "terms": { - | "field": "identifier", - | "min_doc_count": 1, - | "order": { - | "ct": "desc" - | } - | }, - | "aggs": { - | "lastSeen": { - | "max": { - | "field": "createdAt", - | "script": { - | "lang": "painless", - | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : LocalDate.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"))" - | } - | } - | }, - | "ct": { - | "value_count": { - | "field": "identifier2" - | } - | } - | } - | } - | } - |}""".stripMargin + | "query": { + | "bool": { + | "filter": [ + | { + | "exists": { + | "field": "identifier2" + | } + | } + | ] + | } + | }, + | "size": 0, + | "_source": false, + | "aggs": { + | "identifier": { + | "terms": { + | "field": "identifier", + | "min_doc_count": 1, + | "order": { + | "ct": "desc" + | } + | }, + | "aggs": { + | "ct": { + | "value_count": { + | "field": "identifier2" + | } + | }, + | "lastSeen": { + | "max": { + | "field": "createdAt", + | "script": { + | "lang": "painless", + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : LocalDate.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"))" + | } + | } + | } + | } + | } + | } + |}""".stripMargin .replaceAll("\\s", "") .replaceAll("defp", "def p") .replaceAll("defe", "def e") @@ -1350,47 +1350,47 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { println(query) query shouldBe """{ - | "query": { - | "bool": { - | "filter": [ - | { - | "exists": { - | "field": "identifier2" - | } - | } - | ] - | } - | }, - | "size": 0, - | "_source": false, - | "aggs": { - | "identifier": { - | "terms": { - | "field": "identifier", - | "min_doc_count": 1, - | "order": { - | "ct": "desc" - | } - | }, - | "aggs": { - | "lastSeen": { - | "max": { - | "field": "createdAt", - | "script": { - | "lang": "painless", - | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : ZonedDateTime.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")).truncatedTo(ChronoUnit.MINUTES).get(ChronoField.YEAR)" - | } - | } - | }, - | "ct": { - | "value_count": { - | "field": "identifier2" - | } - | } - | } - | } - | } - |}""".stripMargin + | "query": { + | "bool": { + | "filter": [ + | { + | "exists": { + | "field": "identifier2" + | } + | } + | ] + | } + | }, + | "size": 0, + | "_source": false, + | "aggs": { + | "identifier": { + | "terms": { + | "field": "identifier", + | "min_doc_count": 1, + | "order": { + | "ct": "desc" + | } + | }, + | "aggs": { + | "ct": { + | "value_count": { + | "field": "identifier2" + | } + | }, + | "lastSeen": { + | "max": { + | "field": "createdAt", + | "script": { + | "lang": "painless", + | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : ZonedDateTime.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")).truncatedTo(ChronoUnit.MINUTES).get(ChronoField.YEAR)" + | } + | } + | } + | } + | } + | } + |}""".stripMargin .replaceAll("\\s", "") .replaceAll("defp", "def p") .replaceAll("defe", "def e") diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index ea2ae889..637706cd 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -659,17 +659,17 @@ object ElasticAggregation { val currentBucketPath = bucketScriptAggregation.identifier.nestedElement.map(_.nestedPath).getOrElse("") // Extract ALL metrics paths - val allMetricsPaths = bucketScriptAggregation.params.keys + val allMetricsPaths = bucketScriptAggregation.params val result = - allMetricsPaths.flatMap { metricName => - allAggregations.find(agg => agg.aggName == metricName || agg.field == metricName) match { + allMetricsPaths.flatMap { metric => + allAggregations.find(agg => agg.aggName == metric._2 || agg.field == metric._2) match { case Some(sqlAgg) => val metricBucketPath = sqlAgg.nestedElement .map(_.nestedPath) .getOrElse("") if (metricBucketPath == currentBucketPath) { // Metric of the same level - Some(metricName -> metricName) + Some(metric._1 -> metric._2) } else if (isDirectChild(metricBucketPath, currentBucketPath)) { // Metric of a direct child // CHECK if it is a "global" metric (cardinality, etc.) or a bucket metric (avg, sum, etc.) @@ -683,7 +683,7 @@ object ElasticAggregation { // println( // s"[DEBUG extractMetricsPath] Direct child (global metric): $metricName -> $childNestedName>$metricName" // ) - Some(metricName -> s"$childNestedName>$metricName") + Some(metric._1 -> s"$childNestedName>${metric._2}") } else { // Bucket metric: cannot be referenced from the parent // println( diff --git a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 535c0c1e..e741bc9e 100644 --- a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -1205,6 +1205,11 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "aggs": { + | "ct": { + | "value_count": { + | "field": "identifier2" + | } + | }, | "lastSeen": { | "max": { | "field": "createdAt", @@ -1213,11 +1218,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : LocalDate.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd\"))" | } | } - | }, - | "ct": { - | "value_count": { - | "field": "identifier2" - | } | } | } | } @@ -1373,6 +1373,11 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "aggs": { + | "ct": { + | "value_count": { + | "field": "identifier2" + | } + | }, | "lastSeen": { | "max": { | "field": "createdAt", @@ -1381,11 +1386,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : ZonedDateTime.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")).truncatedTo(ChronoUnit.MINUTES).get(ChronoField.YEAR)" | } | } - | }, - | "ct": { - | "value_count": { - | "field": "identifier2" - | } | } | } | } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/GroupByParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/GroupByParser.scala index 3feb6669..dcc68e6d 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/GroupByParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/GroupByParser.scala @@ -25,6 +25,7 @@ trait GroupByParser { def bucketWithFunction: PackratParser[Identifier] = identifierWithArithmeticExpression | identifierWithTransformation | + identifierWithWindowFunction | identifierWithAggregation | identifierWithIntervalFunction | identifierWithFunction | diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/OrderByParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/OrderByParser.scala index 74ea9c9d..6be69619 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/OrderByParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/OrderByParser.scala @@ -33,6 +33,7 @@ trait OrderByParser { def fieldWithFunction: PackratParser[Identifier] = identifierWithArithmeticExpression | identifierWithTransformation | + identifierWithWindowFunction | identifierWithAggregation | identifierWithIntervalFunction | identifierWithFunction | diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala index 7f8f6ac2..245eb2a5 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala @@ -238,7 +238,12 @@ trait Parser "length", "lower", "upper", - "trim" + "trim", + "first", + "last", + "array_agg", + "first_value", + "last_value" // "ltrim", // "rtrim", // "replace", diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/SelectParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/SelectParser.scala index 92ead92d..89b6aff9 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/SelectParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/SelectParser.scala @@ -22,9 +22,9 @@ trait SelectParser { self: Parser with WhereParser => def field: PackratParser[Field] = - (identifierWithWindowFunction | - identifierWithArithmeticExpression | + (identifierWithArithmeticExpression | identifierWithTransformation | + identifierWithWindowFunction | identifierWithAggregation | identifierWithIntervalFunction | identifierWithFunction | diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/WhereParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/WhereParser.scala index b5d880e3..d33cced4 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/WhereParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/WhereParser.scala @@ -95,6 +95,7 @@ trait WhereParser { private def any_identifier: PackratParser[Identifier] = identifierWithArithmeticExpression | identifierWithTransformation | + identifierWithWindowFunction | identifierWithAggregation | identifierWithIntervalFunction | identifierWithFunction | diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala index 66038135..75de263c 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala @@ -96,27 +96,27 @@ package object aggregate { } def count_agg: PackratParser[WindowFunction] = - count ~ window_function() ^^ { case _ ~ top => + count ~ window_function(aggWithFunction) ^^ { case _ ~ top => CountAgg(top._1, top._2) } def min_agg: PackratParser[WindowFunction] = - min ~ window_function() ^^ { case _ ~ top => + min ~ window_function(aggWithFunction) ^^ { case _ ~ top => MinAgg(top._1, top._2) } def max_agg: PackratParser[WindowFunction] = - max ~ window_function() ^^ { case _ ~ top => + max ~ window_function(aggWithFunction) ^^ { case _ ~ top => MaxAgg(top._1, top._2) } def avg_agg: PackratParser[WindowFunction] = - avg ~ window_function() ^^ { case _ ~ top => + avg ~ window_function(aggWithFunction) ^^ { case _ ~ top => AvgAgg(top._1, top._2) } def sum_agg: PackratParser[WindowFunction] = - sum ~ window_function() ^^ { case _ ~ top => + sum ~ window_function(aggWithFunction) ^^ { case _ ~ top => SumAgg(top._1, top._2) } From 3123ebc5e23316eeeee915593f00f0f445914e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 1 Dec 2025 09:22:30 +0100 Subject: [PATCH 34/40] to fix aggregate functions that should be scripted --- .../elastic/sql/bridge/ElasticAggregation.scala | 9 ++++++++- .../elastic/sql/bridge/ElasticAggregation.scala | 9 ++++++++- .../app/softnetwork/elastic/sql/function/package.scala | 2 ++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 264e0ded..6f1a8fa0 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -159,7 +159,14 @@ object ElasticAggregation { val script = Script(s"$context$scriptSrc").lang("painless") buildScript(aggName, script) } else { - buildField(aggName, sourceField) + aggType match { + case th: WindowFunction if th.shouldBeScripted => + val context = PainlessContext() + val scriptSrc = th.identifier.painless(Some(context)) + val script = Script(s"$context$scriptSrc").lang("painless") + buildScript(aggName, script) + case _ => buildField(aggName, sourceField) + } } } diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 637706cd..040cf0c8 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -159,7 +159,14 @@ object ElasticAggregation { val script = Script(s"$context$scriptSrc").lang("painless") buildScript(aggName, script) } else { - buildField(aggName, sourceField) + aggType match { + case th: WindowFunction if th.shouldBeScripted => + val context = PainlessContext() + val scriptSrc = th.identifier.painless(Some(context)) + val script = Script(s"$context$scriptSrc").lang("painless") + buildScript(aggName, script) + case _ => buildField(aggName, sourceField) + } } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala index eb494083..3667718b 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala @@ -37,6 +37,8 @@ package object function { trait FunctionWithIdentifier extends Function { def identifier: Identifier + + override def shouldBeScripted: Boolean = identifier.shouldBeScripted } trait FunctionWithValue[+T] extends Function with TokenValue { From d18742b3d17e0be9e4e38b9a4813ca9840a6d984 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 1 Dec 2025 10:12:02 +0100 Subject: [PATCH 35/40] to fix parser bug --- .../scala/app/softnetwork/elastic/sql/parser/Parser.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala index 245eb2a5..443314d2 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala @@ -106,13 +106,15 @@ trait Parser def separator: PackratParser[Delimiter] = "," ^^ (_ => Separator) - def valueExpr: PackratParser[PainlessScript] = + def valueExpr: PackratParser[PainlessScript] = { // the order is important here + identifierWithWindowFunction | identifierWithTransformation | // transformations applied to an identifier identifierWithIntervalFunction | identifierWithFunction | // fonctions applied to an identifier identifierWithValue | identifier + } implicit def functionAsIdentifier(mf: Function): Identifier = mf match { case id: Identifier => id From aac63c4919465a3cc7bd7248f6f8c092b9754df9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 1 Dec 2025 10:26:25 +0100 Subject: [PATCH 36/40] to fix script with date parse function --- .../app/softnetwork/elastic/sql/SQLQuerySpec.scala | 10 +++++----- .../app/softnetwork/elastic/sql/SQLQuerySpec.scala | 10 +++++----- .../elastic/sql/function/time/package.scala | 6 ++++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index a14d466f..245bd887 100644 --- a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -1373,11 +1373,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "aggs": { - | "ct": { - | "value_count": { - | "field": "identifier2" - | } - | }, | "lastSeen": { | "max": { | "field": "createdAt", @@ -1386,6 +1381,11 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : ZonedDateTime.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")).truncatedTo(ChronoUnit.MINUTES).get(ChronoField.YEAR)" | } | } + | }, + | "ct": { + | "value_count": { + | "field": "identifier2" + | } | } | } | } diff --git a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index e741bc9e..be4c7494 100644 --- a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -1373,11 +1373,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "aggs": { - | "ct": { - | "value_count": { - | "field": "identifier2" - | } - | }, | "lastSeen": { | "max": { | "field": "createdAt", @@ -1386,6 +1381,11 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "source": "def param1 = (doc['createdAt'].size() == 0 ? null : doc['createdAt'].value); (param1 == null) ? null : ZonedDateTime.parse(param1, DateTimeFormatter.ofPattern(\"yyyy-MM-dd HH:mm:ss.SSS XXX\")).truncatedTo(ChronoUnit.MINUTES).get(ChronoField.YEAR)" | } | } + | }, + | "ct": { + | "value_count": { + | "field": "identifier2" + | } | } | } | } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/time/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/time/package.scala index 82fee992..55a9bd44 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/time/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/time/package.scala @@ -499,10 +499,10 @@ package object time { case class DateParse(identifier: Identifier, format: String) extends DateFunction + with DateMathScript with TransformFunction[SQLVarchar, SQLDate] with FunctionWithIdentifier - with FunctionWithDateTimeFormat - with DateMathScript { + with FunctionWithDateTimeFormat { override def fun: Option[PainlessScript] = None override def args: List[PainlessScript] = List(identifier) @@ -548,6 +548,8 @@ package object time { } override def formatScript: Option[String] = Some(format) + + override def shouldBeScripted: Boolean = true // FIXME } case object DateFormat extends Expr("DATE_FORMAT") with TokenRegex with PainlessScript { From 336af080becb3d6040e9d3da9f1dd40741fe4ecc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Wed, 3 Dec 2025 09:43:52 +0100 Subject: [PATCH 37/40] to fix aggregations with multiple partition by, update README.md + keywords and documentation related to aggregate functions, add query normalization before parsing the latter --- README.md | 585 +++++++++++++++++- .../sql/bridge/ElasticAggregation.scala | 365 +++++------ .../elastic/client/ScrollApi.scala | 2 +- .../elastic/client/SearchApi.scala | 2 +- documentation/sql/functions_aggregate.md | 29 +- documentation/sql/keywords.md | 35 +- .../sql/bridge/ElasticAggregation.scala | 365 +++++------ .../elastic/sql/parser/Parser.scala | 63 +- .../parser/function/aggregate/package.scala | 14 +- .../elastic/sql/query/GroupBy.scala | 11 + 10 files changed, 1029 insertions(+), 442 deletions(-) diff --git a/README.md b/README.md index ffeed6cd..f75885c5 100644 --- a/README.md +++ b/README.md @@ -260,18 +260,20 @@ val results = client.search(SQLQuery(sqlQuery)) } }, "size": 0, - "_source": true, + "_source": false, "aggs": { "restaurant_name": { "terms": { "field": "restaurant_name", - "size": 1000 + "size": 1000, + "min_doc_count": 1 }, "aggs": { "restaurant_city": { "terms": { "field": "restaurant_city", - "size": 1000 + "size": 1000, + "min_doc_count": 1 }, "aggs": { "menu": { @@ -297,7 +299,8 @@ val results = client.search(SQLQuery(sqlQuery)) "menu_category": { "terms": { "field": "menus.category", - "size": 1000 + "size": 1000, + "min_doc_count": 1 }, "aggs": { "dish": { @@ -308,14 +311,10 @@ val results = client.search(SQLQuery(sqlQuery)) "dish_name": { "terms": { "field": "menus.dishes.name", - "size": 1000 + "size": 1000, + "min_doc_count": 1 }, "aggs": { - "avg_dish_price": { - "avg": { - "field": "menus.dishes.price" - } - }, "having_filter": { "bucket_selector": { "buckets_path": { @@ -340,7 +339,8 @@ val results = client.search(SQLQuery(sqlQuery)) "ingredient_name": { "terms": { "field": "menus.dishes.ingredients.name", - "size": 1000 + "size": 1000, + "min_doc_count": 1 }, "aggs": { "avg_ingredient_cost": { @@ -388,7 +388,568 @@ val results = client.search(SQLQuery(sqlQuery)) ``` --- -### **3.2. Compile-Time SQL Query Validation** +### **3.2 Window Functions Support** + +SoftClient4ES supports **SQL window functions** that are automatically translated into Elasticsearch aggregations. Window functions allow you to perform calculations across sets of rows that are related to the current row, without collapsing the result set. + +#### **Supported Window Functions** + +| Window Function | SQL Syntax | Description | Use Case | +|--------------------|-------------------------------------------------------------------|-------------------------------------------------|--------------------------------------| +| **FIRST_VALUE** | `FIRST_VALUE(field) OVER (PARTITION BY ... ORDER BY ...)` | Returns the first value in an ordered partition | Get first sale amount per product | +| **LAST_VALUE** | `LAST_VALUE(field) OVER (PARTITION BY ... ORDER BY ...)` | Returns the last value in an ordered partition | Get most recent price per product | +| **ARRAY_AGG** | `ARRAY_AGG(field) OVER (PARTITION BY ... ORDER BY ... LIMIT ...)` | Aggregates values into an array | Collect all sale amounts per product | +| **COUNT** | `COUNT(field) OVER (PARTITION BY ...)` | Counts values in each partition | Count sales per product | +| **SUM** | `SUM(field) OVER (PARTITION BY ...)` | Sums values in each partition | Calculate total sales per product | +| **AVG** | `AVG(field) OVER (PARTITION BY ...)` | Averages values in each partition | Calculate average price per category | +| **MIN** | `MIN(field) OVER (PARTITION BY ...)` | Finds minimum value in each partition | Find lowest price per product | +| **MAX** | `MAX(field) OVER (PARTITION BY ...)` | Finds maximum value in each partition | Find highest sale per product | +| **COUNT DISTINCT** | `COUNT(DISTINCT field) OVER (PARTITION BY ...)` | Counts unique values in each partition | Count unique customers per product | + +#### **Key Features** + +✅ **PARTITION BY**: Group rows into partitions for separate calculations +✅ **ORDER BY**: Define ordering within partitions (required for `FIRST_VALUE`, `LAST_VALUE`, `ARRAY_AGG`) +✅ **Multiple Partitions**: Support for different partition schemes in the same query +✅ **Mixed Aggregations**: Combine window functions with standard `GROUP BY` aggregations +✅ **In-Memory Join**: Window function results are joined with main query results via partition keys +✅ **Type Safety**: Full compile-time validation for window function queries + +#### **How Window Functions Work** + +When a SQL query mixes window functions with non-aggregated fields, SoftClient4ES executes **two separate queries**: + +1. **Window Functions Query**: Computes window function results using aggregations +2. **Main Query**: Retrieves non-aggregated fields using standard Elasticsearch search + +The results are then **joined in memory** using the partition keys (`PARTITION BY` fields), ensuring that each row from the main query is enriched with its corresponding window function values. + +**Query Execution Flow:** + +``` +SQL Query with Window Functions + ↓ + ┌────────────────────────────────────┐ + │ Query Analysis & Decomposition │ + └────────────────────────────────────┘ + ↓ ↓ + ┌─────────────┐ ┌──────────────────┐ + │ Main Query │ │ Window Functions │ + │ (Fields) │ │ Query (Aggs) │ + └─────────────┘ └──────────────────┘ + ↓ ↓ + ┌─────────────┐ ┌──────────────────┐ + │ ES Search │ │ ES Aggregations │ + └─────────────┘ └──────────────────┘ + ↓ ↓ + ┌─────────────────────────────────────┐ + │ In-Memory Join (Partition Keys) │ + └─────────────────────────────────────┘ + ↓ + ┌─────────────────────────────────────┐ + │ Enriched Result Set │ + └─────────────────────────────────────┘ +``` + +#### **Example 1: Product Sales Analysis with Window Functions** + +```scala +case class ProductSalesAnalysis( + productId: String, + productName: String, + saleMonth: String, + monthlySales: Double, + firstSaleAmount: Double, // FIRST_VALUE + lastSaleAmount: Double, // LAST_VALUE + allSaleAmounts: List[Double], // ARRAY_AGG + totalSales: Double, // SUM OVER + avgSaleAmount: Double, // AVG OVER + minSaleAmount: Double, // MIN OVER + maxSaleAmount: Double, // MAX OVER + saleCount: Long, // COUNT OVER + uniqueCustomers: Long // COUNT DISTINCT OVER +) + +val sqlQuery = """ + SELECT + product_id AS productId, + product_name AS productName, + DATE_TRUNC('month', sale_date) AS saleMonth, + SUM(amount) AS monthlySales, + + -- Window functions with different partitions + FIRST_VALUE(amount) OVER ( + PARTITION BY product_id, DATE_TRUNC('month', sale_date) + ORDER BY sale_date ASC + ) AS firstSaleAmount, + + LAST_VALUE(amount) OVER ( + PARTITION BY product_id, DATE_TRUNC('month', sale_date) + ORDER BY sale_date ASC + ) AS lastSaleAmount, + + ARRAY_AGG(amount) OVER ( + PARTITION BY product_id + ORDER BY sale_date ASC + LIMIT 100 + ) AS allSaleAmounts, + + SUM(amount) OVER (PARTITION BY product_id) AS totalSales, + AVG(amount) OVER (PARTITION BY product_id) AS avgSaleAmount, + MIN(amount) OVER (PARTITION BY product_id) AS minSaleAmount, + MAX(amount) OVER (PARTITION BY product_id) AS maxSaleAmount, + COUNT(amount) OVER (PARTITION BY product_id) AS saleCount, + COUNT(DISTINCT customer_id) OVER (PARTITION BY product_id) AS uniqueCustomers + + FROM sales + WHERE sale_date >= '2024-01-01' + GROUP BY product_id, product_name, DATE_TRUNC('month', sale_date) + ORDER BY product_id, saleMonth +""" + +// Type-safe execution with compile-time validation +val results: Source[ProductSalesAnalysis, NotUsed] = + client.scrollAs[ProductSalesAnalysis](sqlQuery) + +results.runWith(Sink.foreach { analysis => + println(s""" + Product: ${analysis.productName} (${analysis.productId}) + Month: ${analysis.saleMonth} + Monthly Sales: $${analysis.monthlySales} + First Sale: $${analysis.firstSaleAmount} + Last Sale: $${analysis.lastSaleAmount} + All Sales: ${analysis.allSaleAmounts.mkString("[", ", ", "]")} + Total Sales (All Time): $${analysis.totalSales} + Average Sale: $${analysis.avgSaleAmount} + Price Range: $${analysis.minSaleAmount} - $${analysis.maxSaleAmount} + Sale Count: ${analysis.saleCount} + Unique Customers: ${analysis.uniqueCustomers} + """) +}) +``` + +#### **Example 1: Translation to Elasticsearch DSL** + +Since we use `GROUP BY` there is none non-aggregated fields and the SQL query above is decomposed into only one Elasticsearch query: + +**Query: Window Functions Aggregations** + +```json +{ + "query": { + "bool": { + "filter": [ + { + "range": { + "sale_date": { + "gte": "2024-01-01" + } + } + } + ] + } + }, + "size": 0, + "_source": false, + "aggs": { + "productId": { + "terms": { + "field": "product_id", + "min_doc_count": 1, + "order": { + "_key": "asc" + } + }, + "aggs": { + "allSaleAmounts": { + "top_hits": { + "size": 100, + "sort": [ + { + "sale_date": { + "order": "asc" + } + } + ], + "_source": { + "includes": [ + "amount" + ] + } + } + }, + "totalSales": { + "sum": { + "field": "amount" + } + }, + "avgSaleAmount": { + "avg": { + "field": "amount" + } + }, + "minSaleAmount": { + "min": { + "field": "amount" + } + }, + "maxSaleAmount": { + "max": { + "field": "amount" + } + }, + "saleCount": { + "value_count": { + "field": "amount" + } + }, + "uniqueCustomers": { + "cardinality": { + "field": "customer_id" + } + }, + "saleMonth": { + "date_histogram": { + "interval": "1M", + "min_doc_count": 1, + "field": "sale_date" + }, + "aggs": { + "firstSaleAmount": { + "top_hits": { + "size": 1, + "sort": [ + { + "sale_date": { + "order": "asc" + } + } + ], + "_source": { + "includes": [ + "amount" + ] + } + } + }, + "lastSaleAmount": { + "top_hits": { + "size": 1, + "sort": [ + { + "sale_date": { + "order": "desc" + } + } + ], + "_source": { + "includes": [ + "amount" + ] + } + } + } + } + }, + "productName": { + "terms": { + "field": "product_name", + "min_doc_count": 1 + }, + "aggs": { + "saleMonth": { + "date_histogram": { + "interval": "1M", + "min_doc_count": 1, + "field": "sale_date" + }, + "aggs": { + "monthlySales": { + "sum": { + "field": "amount" + } + } + } + } + } + } + } + } + } +} +``` + +#### **Example 2: Customer Purchase Patterns** + +```scala +case class CustomerPurchasePattern( + customerId: String, + customerName: String, + purchaseDate: String, + amount: Double, + firstPurchaseAmount: Double, + lastPurchaseAmount: Double, + allPurchaseAmounts: List[Double], + totalSpent: Double, + avgPurchaseAmount: Double, + purchaseCount: Long +) + +val sqlQuery = """ + SELECT + customer_id AS customerId, + customer_name AS customerName, + purchase_date AS purchaseDate, + amount, + + FIRST_VALUE(amount) OVER ( + PARTITION BY customer_id + ORDER BY purchase_date ASC + ) AS firstPurchaseAmount, + + LAST_VALUE(amount) OVER ( + PARTITION BY customer_id + ORDER BY purchase_date ASC + ) AS lastPurchaseAmount, + + ARRAY_AGG(amount) OVER ( + PARTITION BY customer_id + ORDER BY purchase_date ASC + LIMIT 100 + ) AS allPurchaseAmounts, + + SUM(amount) OVER (PARTITION BY customer_id) AS totalSpent, + AVG(amount) OVER (PARTITION BY customer_id) AS avgPurchaseAmount, + COUNT(*) OVER (PARTITION BY customer_id) AS purchaseCount + + FROM purchases + WHERE purchase_date >= '2024-01-01' + ORDER BY customer_id, purchase_date +""" + +val patterns: Source[CustomerPurchasePattern, NotUsed] = + client.scrollAs[CustomerPurchasePattern](sqlQuery) +``` + +**Execution Strategy for Mixed Queries:** + +Since this query includes non-aggregated fields (`customerName`, `purchaseDate`, `amount`), the execution involves: + +1. **Window Functions Query**: Computes all `OVER` clause results grouped by `customer_id` +2. **Main Query**: Retrieves individual purchase records with fields +3. **In-Memory Join**: Each purchase record is enriched with window function values matching its `customer_id` + +#### **Example 2: Translation to Elasticsearch DSL** + +The SQL query above is decomposed into two Elasticsearch queries: + +**Query 1: Window Functions Aggregations** + +```json +{ + "query": { + "bool": { + "filter": [ + { + "range": { + "purchase_date": { + "gte": "2024-01-01" + } + } + } + ] + } + }, + "size": 0, + "_source": false, + "aggs": { + "customerId": { + "terms": { + "field": "customer_id", + "min_doc_count": 1 + }, + "aggs": { + "firstPurchaseAmount": { + "top_hits": { + "size": 1, + "sort": [ + { + "purchase_date": { + "order": "asc" + } + } + ], + "_source": { + "includes": [ + "amount" + ] + } + } + }, + "lastPurchaseAmount": { + "top_hits": { + "size": 1, + "sort": [ + { + "purchase_date": { + "order": "desc" + } + } + ], + "_source": { + "includes": [ + "amount" + ] + } + } + }, + "allPurchaseAmounts": { + "top_hits": { + "size": 100, + "sort": [ + { + "purchase_date": { + "order": "asc" + } + } + ], + "_source": { + "includes": [ + "amount" + ] + } + } + }, + "totalSpent": { + "sum": { + "field": "amount" + } + }, + "avgPurchaseAmount": { + "avg": { + "field": "amount" + } + }, + "purchaseCount": { + "value_count": { + "field": "_index" + } + } + } + } + } +} +``` + +**Query 2: Main Query (Non-Aggregated Fields)** + +Since this query includes non-aggregated fields, a separate search query would be executed: + +```json +{ + "query": { + "bool": { + "filter": [ + { + "range": { + "purchase_date": { + "gte": "2024-01-01" + } + } + } + ] + } + }, + "sort": [ + { + "customerId": { + "order": "asc" + } + }, + { + "purchaseDate": { + "order": "asc" + } + } + ], + "_source": { + "includes": [ + "customer_id", + "customer_name", + "purchase_date", + "amount" + ] + } +} +``` + +#### **Example 3: Time-Series Analysis with Window Functions** + +```scala +val sqlQuery = """ + SELECT + sensor_id AS sensorId, + timestamp, + temperature, + + -- Rolling statistics using window functions + AVG(temperature) OVER ( + PARTITION BY sensor_id + ORDER BY timestamp + ) AS movingAvg, + + MIN(temperature) OVER (PARTITION BY sensor_id) AS minTemp, + MAX(temperature) OVER (PARTITION BY sensor_id) AS maxTemp, + + FIRST_VALUE(temperature) OVER ( + PARTITION BY sensor_id + ORDER BY timestamp ASC + ) AS firstReading, + + LAST_VALUE(temperature) OVER ( + PARTITION BY sensor_id + ORDER BY timestamp ASC + ) AS currentReading + + FROM sensor_data + WHERE timestamp >= NOW() - INTERVAL 1 HOUR + ORDER BY sensor_id, timestamp +""" +``` + +#### **Performance Considerations** + +| Consideration | Recommendation | Impact | +|-------------------------|----------------------------------------------------------|-------------------| +| **ARRAY_AGG Size** | Use `LIMIT` in `OVER` clause to control array size | Memory usage | +| **Multiple Partitions** | Different partition keys require separate aggregations | Query complexity | +| **ORDER BY in OVER** | Adds sorting overhead; use only when necessary | Performance | +| **Large Result Sets** | Use `scrollAs[T]` instead of `searchAs[T]` for streaming | Memory efficiency | +| **In-Memory Join** | Partition keys should have reasonable cardinality | Memory & CPU | +| **Mixed Queries** | Non-aggregated fields require additional search query | Network & latency | + +#### **Best Practices** + +✅ **Minimize Partition Keys**: Use the smallest set of partition keys necessary +✅ **Reuse Partitions**: Group window functions with the same `PARTITION BY` clause +✅ **Limit Array Sizes**: Control `ARRAY_AGG` result size to prevent memory issues +✅ **Use Streaming**: Prefer `scrollAs[T]` for large datasets +✅ **Index Partition Fields**: Ensure partition key fields are indexed in Elasticsearch +✅ **Monitor Memory**: Track memory usage when joining large result sets + +#### **Limitations** + +⚠️ **ROWS/RANGE Frames**: Not yet supported +⚠️ **RANK/ROW_NUMBER**: Not yet supported +⚠️ **LEAD/LAG**: Not yet supported +⚠️ **NTILE**: Not yet supported +⚠️ **Partition Cardinality**: Very high cardinality partitions may impact performance + +📖 **[Full Window Functions Documentation](documentation/sql/functions_aggregate.md)** + +--- + +### **3.3. Compile-Time SQL Query Validation** SoftClient4ES provides **compile-time validation** for SQL queries used with type-safe methods like `searchAs[T]` and `scrollAs[T]`. This ensures that your queries are compatible with your Scala case classes **before your code even runs**, preventing runtime deserialization errors. diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 6f1a8fa0..d6c11fd8 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -350,207 +350,216 @@ object ElasticAggregation { nested: Option[NestedElement], allElasticAggregations: Seq[ElasticAggregation] ): Seq[Aggregation] = { + val trees = BucketTree(buckets.flatMap(_.headOption)) println( - s"[DEBUG] buildBuckets called with buckets: \n${BucketTree(buckets.flatMap(_.headOption))}" + s"[DEBUG] buildBuckets called with buckets: \n$trees" ) - buckets.flatMap { tree => - tree.reverse.foldLeft(Option.empty[Aggregation]) { (current, node) => - val currentBucketPath = node.bucketPath + for (tree <- buckets) yield { + val treeNodes = + tree.sortBy(_.level).reverse.foldLeft(Seq.empty[NodeAggregation]) { (current, node) => + val currentBucketPath = node.bucketPath - val bucket = node.bucket + val bucket = node.bucket - val aggregations = - aggs.filter(agg => agg.bucketPath == currentBucketPath).map(_.agg) + val aggregations = + aggs.filter(agg => agg.bucketPath == currentBucketPath).map(_.agg) - // Determine the nested path of the current bucket - val currentBucketNestedPath = bucket.identifier.path + // Determine the nested path of the current bucket + val currentBucketNestedPath = bucket.identifier.path - val aggScript = - if (!bucket.isBucketScript && bucket.shouldBeScripted) { - val context = PainlessContext() - val painless = bucket.painless(Some(context)) - Some(Script(s"$context$painless").lang("painless")) - } else { - None - } + val aggScript = + if (!bucket.isBucketScript && bucket.shouldBeScripted) { + val context = PainlessContext() + val painless = bucket.painless(Some(context)) + Some(Script(s"$context$painless").lang("painless")) + } else { + None + } - var agg: Aggregation = { - bucket.out match { - case _: SQLTemporal => - val functions = bucket.identifier.functions - val interval: Option[DateHistogramInterval] = - if (functions.size == 1) { - functions.head match { - case trunc: DateTrunc => - trunc.unit match { - case TimeUnit.YEARS => Option(DateHistogramInterval.Year) - case TimeUnit.QUARTERS => Option(DateHistogramInterval.Quarter) - case TimeUnit.MONTHS => Option(DateHistogramInterval.Month) - case TimeUnit.WEEKS => Option(DateHistogramInterval.Week) - case TimeUnit.DAYS => Option(DateHistogramInterval.Day) - case TimeUnit.HOURS => Option(DateHistogramInterval.Hour) - case TimeUnit.MINUTES => Option(DateHistogramInterval.Minute) - case TimeUnit.SECONDS => Option(DateHistogramInterval.Second) - case _ => None - } - case _ => None + var agg: Aggregation = { + bucket.out match { + case _: SQLTemporal => + val functions = bucket.identifier.functions + val interval: Option[DateHistogramInterval] = + if (functions.size == 1) { + functions.head match { + case trunc: DateTrunc => + trunc.unit match { + case TimeUnit.YEARS => Option(DateHistogramInterval.Year) + case TimeUnit.QUARTERS => Option(DateHistogramInterval.Quarter) + case TimeUnit.MONTHS => Option(DateHistogramInterval.Month) + case TimeUnit.WEEKS => Option(DateHistogramInterval.Week) + case TimeUnit.DAYS => Option(DateHistogramInterval.Day) + case TimeUnit.HOURS => Option(DateHistogramInterval.Hour) + case TimeUnit.MINUTES => Option(DateHistogramInterval.Minute) + case TimeUnit.SECONDS => Option(DateHistogramInterval.Second) + case _ => None + } + case _ => None + } + } else { + None } - } else { - None + + aggScript match { + case Some(script) => + // Scripted date histogram + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .script(script) + .minDocCount(1) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .script(script) + .minDocCount(1) + } + case _ => + // Standard date histogram + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .field(currentBucketNestedPath) + .minDocCount(1) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, calendarInterval = interval) + .field(currentBucketNestedPath) + .minDocCount(1) + } } - aggScript match { - case Some(script) => - // Scripted date histogram - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - DateHistogramAggregation(bucket.name, calendarInterval = interval) - .script(script) - .minDocCount(1) - .order(direction match { - case Asc => HistogramOrder("_key", asc = true) - case _ => HistogramOrder("_key", asc = false) - }) + case _ => + aggScript match { + case Some(script) => + // Scripted terms aggregation + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + TermsAggregation(bucket.name) + .script(script) + .minDocCount(1) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + TermsAggregation(bucket.name) + .script(script) + .minDocCount(1) + } + case _ => + // Standard terms aggregation + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + termsAgg(bucket.name, currentBucketNestedPath) + .minDocCount(1) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + termsAgg(bucket.name, currentBucketNestedPath) + .minDocCount(1) + } + } + } + } + agg match { + case termsAgg: TermsAggregation => + bucket.size.foreach(s => agg = termsAgg.size(s)) + having match { + case Some(criteria) => + criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = termsAgg.includeRegex(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = termsAgg.includeExactValues(values.toArray) case _ => - DateHistogramAggregation(bucket.name, calendarInterval = interval) - .script(script) - .minDocCount(1) } - case _ => - // Standard date histogram - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - DateHistogramAggregation(bucket.name, calendarInterval = interval) - .field(currentBucketNestedPath) - .minDocCount(1) - .order(direction match { - case Asc => HistogramOrder("_key", asc = true) - case _ => HistogramOrder("_key", asc = false) - }) + criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = termsAgg.excludeRegex(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = termsAgg.excludeExactValues(values.toArray) case _ => - DateHistogramAggregation(bucket.name, calendarInterval = interval) - .field(currentBucketNestedPath) - .minDocCount(1) } + case _ => } - case _ => - aggScript match { - case Some(script) => - // Scripted terms aggregation - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - TermsAggregation(bucket.name) - .script(script) - .minDocCount(1) - .order(Seq(direction match { - case Asc => TermsOrder("_key", asc = true) - case _ => TermsOrder("_key", asc = false) - })) - case _ => - TermsAggregation(bucket.name) - .script(script) - .minDocCount(1) - } + } + current match { + case nodes if nodes.nonEmpty => + val childNodes = + nodes.filter(_.node.parentBucketPath.getOrElse("") == node.bucketPath) + agg match { + case termsAgg: TermsAggregation => + agg = termsAgg.subaggs(aggregations ++ childNodes.map(_.agg)) + case dateHistogramAgg: DateHistogramAggregation => + agg = dateHistogramAgg.subaggs(aggregations ++ childNodes.map(_.agg)) case _ => - // Standard terms aggregation - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - termsAgg(bucket.name, currentBucketNestedPath) - .minDocCount(1) - .order(Seq(direction match { - case Asc => TermsOrder("_key", asc = true) - case _ => TermsOrder("_key", asc = false) - })) - case _ => - termsAgg(bucket.name, currentBucketNestedPath) - .minDocCount(1) - } } - } - } - agg match { - case termsAgg: TermsAggregation => - bucket.size.foreach(s => agg = termsAgg.size(s)) - having match { - case Some(criteria) => - criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { - case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => - agg = termsAgg.includeRegex(regex) - case BucketIncludesExcludes(values, _) if values.nonEmpty => - agg = termsAgg.includeExactValues(values.toArray) - case _ => - } - criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { - case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => - agg = termsAgg.excludeRegex(regex) - case BucketIncludesExcludes(values, _) if values.nonEmpty => - agg = termsAgg.excludeExactValues(values.toArray) - case _ => - } - case _ => - } - case _ => - } - current match { - case Some(subAgg) => - agg match { - case termsAgg: TermsAggregation => - agg = termsAgg.subaggs(aggregations :+ subAgg) - case dateHistogramAgg: DateHistogramAggregation => - agg = dateHistogramAgg.subaggs(aggregations :+ subAgg) - case _ => - } - Some(agg) - case None => - val subaggs = - having match { - case Some(criteria) => - val script = metricSelectorForBucket( - criteria, - nested, - allElasticAggregations - ) - - if (script.nonEmpty) { - val bucketSelector = - bucketSelectorAggregation( - "having_filter", - Script(script), - extractMetricsPathForBucket( - criteria, - nested, - allElasticAggregations + NodeAggregation(node, agg) +: nodes + case Nil => + val subaggs = + having match { + case Some(criteria) => + val script = metricSelectorForBucket( + criteria, + nested, + allElasticAggregations + ) + + if (script.nonEmpty) { + val bucketSelector = + bucketSelectorAggregation( + "having_filter", + Script(script), + extractMetricsPathForBucket( + criteria, + nested, + allElasticAggregations + ) ) - ) - aggregations :+ bucketSelector - } else { + aggregations :+ bucketSelector + } else { + aggregations + } + case None => aggregations - } - case None => - aggregations + } + + agg match { + case termsAgg: TermsAggregation => + val aggregationsWithOrder: Seq[TermsOrder] = + aggregationsDirection.toSeq.map { kv => + kv._2 match { + case Asc => TermsOrder(kv._1, asc = true) + case _ => TermsOrder(kv._1, asc = false) + } + } + if (aggregationsWithOrder.nonEmpty) + agg = termsAgg.order(aggregationsWithOrder).copy(subaggs = subaggs) + else + agg = termsAgg.copy(subaggs = subaggs) + case dateHistogramAggregation: DateHistogramAggregation => + agg = dateHistogramAggregation.copy(subaggs = subaggs) } - agg match { - case termsAgg: TermsAggregation => - val aggregationsWithOrder: Seq[TermsOrder] = aggregationsDirection.toSeq.map { kv => - kv._2 match { - case Asc => TermsOrder(kv._1, asc = true) - case _ => TermsOrder(kv._1, asc = false) - } - } - if (aggregationsWithOrder.nonEmpty) - agg = termsAgg.order(aggregationsWithOrder).copy(subaggs = subaggs) - else - agg = termsAgg.copy(subaggs = subaggs) - case dateHistogramAggregation: DateHistogramAggregation => - agg = dateHistogramAggregation.copy(subaggs = subaggs) - } - Some(agg) + Seq(NodeAggregation(node, agg)) + } } - } + + treeNodes.headOption.map(_.agg) + } - } + }.flatten /** Generates the bucket_selector script for a given bucket */ @@ -781,3 +790,5 @@ object ElasticAggregation { result } } + +private case class NodeAggregation(node: BucketNode, agg: Aggregation) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala index 6d738565..3a2b84cd 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala @@ -122,7 +122,7 @@ trait ScrollApi extends ElasticClientHelpers { )(implicit system: ActorSystem): Source[(Map[String, Any], ScrollMetrics), NotUsed] = { sql.request match { case Some(Left(single)) => - if (single.windowFunctions.nonEmpty) + if (single.windowFunctions.nonEmpty && single.fields.nonEmpty) return scrollWithWindowEnrichment(sql, single, config) val sqlRequest = single.copy(score = sql.score) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala index afd3c30b..964de949 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala @@ -68,7 +68,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { collection.immutable.Seq(single.sources: _*), sql = Some(sql.query) ) - if (single.windowFunctions.exists(_.isWindowing)) + if (single.windowFunctions.exists(_.isWindowing) && single.fields.nonEmpty) searchWithWindowEnrichment(sql, single) else singleSearch(elasticQuery, single.fieldAliases, single.sqlAggregations) diff --git a/documentation/sql/functions_aggregate.md b/documentation/sql/functions_aggregate.md index aa755dfe..c23f8e07 100644 --- a/documentation/sql/functions_aggregate.md +++ b/documentation/sql/functions_aggregate.md @@ -43,6 +43,7 @@ Count rows or non-null expressions. With `DISTINCT` counts distinct values. COUNT(*) COUNT(expr) COUNT(DISTINCT expr) +COUNT(*) OVER (PARTITION BY partition_expr, ...) --(since v0.14.0) ``` **Inputs:** @@ -195,6 +196,7 @@ Sum of values. ```sql SUM(expr) SUM(DISTINCT expr) +SUM(expr) OVER (PARTITION BY partition_expr, ...) --(since v0.14.0) ``` **Inputs:** @@ -342,6 +344,7 @@ Average of values. ```sql AVG(expr) AVG(DISTINCT expr) +AVG(expr) OVER (PARTITION BY partition_expr, ...) --(since v0.14.0) ``` **Inputs:** @@ -491,6 +494,7 @@ Minimum value in group. **Syntax:** ```sql MIN(expr) +MIN(expr) OVER (PARTITION BY partition_expr, ...) --(since v0.14.0) ``` **Inputs:** @@ -618,6 +622,7 @@ Maximum value in group. **Syntax:** ```sql MAX(expr) +MAX(expr) OVER (PARTITION BY partition_expr, ...) --(since v0.14.0) ``` **Inputs:** @@ -1075,9 +1080,9 @@ SELECT ARRAY_AGG(name) OVER ( PARTITION BY department ORDER BY hire_date ASC + LIMIT 100 ) AS employees -FROM emp -LIMIT 100; +FROM emp; -- Result: employees as an array of name values per department (sorted and limited) ``` @@ -1192,27 +1197,9 @@ SELECT ARRAY_AGG(name) OVER ( PARTITION BY department ORDER BY hire_date ASC + LIMIT 100 -- Important: limits result set size ) AS employees -FROM emp -LIMIT 100; -- Important: limits result set size -``` - -**Comparison with STRING_AGG (if available):** -```sql --- ARRAY_AGG returns array -SELECT - department, - ARRAY_AGG(name) OVER (PARTITION BY department) AS name_array FROM emp; --- Result: ['John', 'Jane', 'Bob'] - --- STRING_AGG returns string (if supported) -SELECT - department, - STRING_AGG(name, ', ') AS name_string -FROM emp -GROUP BY department; --- Result: 'John, Jane, Bob' ``` --- diff --git a/documentation/sql/keywords.md b/documentation/sql/keywords.md index 798ee8bb..986848de 100644 --- a/documentation/sql/keywords.md +++ b/documentation/sql/keywords.md @@ -14,6 +14,7 @@ GROUP BY HAVING ORDER BY OFFSET +LIMIT ## Aliases and type conversion AS @@ -31,6 +32,7 @@ AVG MIN MAX OVER +PARTITION BY FIRST_VALUE LAST_VALUE ARRAY_AGG @@ -87,22 +89,23 @@ ISNOTNULL NULLIF ## Date/Time/Datetime/Timestamp functions -YEAR -QUARTER -MONTH -WEEK -DAY -HOUR -MINUTE -SECOND -MILLISECOND -MICROSECOND -NANOSECOND -EPOCHDAY -OFFSET_SECONDS -LAST_DAY -WEEKDAY -YEARDAY +[//]: # (YEAR ) +[//]: # (QUARTER ) +[//]: # (MONTH ) +[//]: # (WEEK ) +[//]: # (DAY ) +[//]: # (HOUR ) +[//]: # (MINUTE ) +[//]: # (SECOND ) +[//]: # (MILLISECOND ) +[//]: # (MICROSECOND ) +[//]: # (NANOSECOND ) +[//]: # (EPOCHDAY ) +[//]: # (OFFSET_SECONDS ) +[//]: # (LAST_DAY ) +[//]: # (LASTDAY) +[//]: # (WEEKDAY ) +[//]: # (YEARDAY ) INTERVAL CURRENT_DATE CURDATE diff --git a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 040cf0c8..d76b14a5 100644 --- a/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -350,207 +350,216 @@ object ElasticAggregation { nested: Option[NestedElement], allElasticAggregations: Seq[ElasticAggregation] ): Seq[Aggregation] = { + val trees = BucketTree(buckets.flatMap(_.headOption)) println( - s"[DEBUG] buildBuckets called with buckets: \n${BucketTree(buckets.flatMap(_.headOption))}" + s"[DEBUG] buildBuckets called with buckets: \n$trees" ) - buckets.flatMap { tree => - tree.reverse.foldLeft(Option.empty[Aggregation]) { (current, node) => - val currentBucketPath = node.bucketPath + for (tree <- buckets) yield { + val treeNodes = + tree.sortBy(_.level).reverse.foldLeft(Seq.empty[NodeAggregation]) { (current, node) => + val currentBucketPath = node.bucketPath - val bucket = node.bucket + val bucket = node.bucket - val aggregations = - aggs.filter(agg => agg.bucketPath == currentBucketPath).map(_.agg) + val aggregations = + aggs.filter(agg => agg.bucketPath == currentBucketPath).map(_.agg) - // Determine the nested path of the current bucket - val currentBucketNestedPath = bucket.identifier.path + // Determine the nested path of the current bucket + val currentBucketNestedPath = bucket.identifier.path - val aggScript = - if (!bucket.isBucketScript && bucket.shouldBeScripted) { - val context = PainlessContext() - val painless = bucket.painless(Some(context)) - Some(Script(s"$context$painless").lang("painless")) - } else { - None - } + val aggScript = + if (!bucket.isBucketScript && bucket.shouldBeScripted) { + val context = PainlessContext() + val painless = bucket.painless(Some(context)) + Some(Script(s"$context$painless").lang("painless")) + } else { + None + } - var agg: Aggregation = { - bucket.out match { - case _: SQLTemporal => - val functions = bucket.identifier.functions - val interval: Option[DateHistogramInterval] = - if (functions.size == 1) { - functions.head match { - case trunc: DateTrunc => - trunc.unit match { - case TimeUnit.YEARS => Option(DateHistogramInterval.Year) - case TimeUnit.QUARTERS => Option(DateHistogramInterval.Quarter) - case TimeUnit.MONTHS => Option(DateHistogramInterval.Month) - case TimeUnit.WEEKS => Option(DateHistogramInterval.Week) - case TimeUnit.DAYS => Option(DateHistogramInterval.Day) - case TimeUnit.HOURS => Option(DateHistogramInterval.Hour) - case TimeUnit.MINUTES => Option(DateHistogramInterval.Minute) - case TimeUnit.SECONDS => Option(DateHistogramInterval.Second) - case _ => None - } - case _ => None + var agg: Aggregation = { + bucket.out match { + case _: SQLTemporal => + val functions = bucket.identifier.functions + val interval: Option[DateHistogramInterval] = + if (functions.size == 1) { + functions.head match { + case trunc: DateTrunc => + trunc.unit match { + case TimeUnit.YEARS => Option(DateHistogramInterval.Year) + case TimeUnit.QUARTERS => Option(DateHistogramInterval.Quarter) + case TimeUnit.MONTHS => Option(DateHistogramInterval.Month) + case TimeUnit.WEEKS => Option(DateHistogramInterval.Week) + case TimeUnit.DAYS => Option(DateHistogramInterval.Day) + case TimeUnit.HOURS => Option(DateHistogramInterval.Hour) + case TimeUnit.MINUTES => Option(DateHistogramInterval.Minute) + case TimeUnit.SECONDS => Option(DateHistogramInterval.Second) + case _ => None + } + case _ => None + } + } else { + None } - } else { - None + + aggScript match { + case Some(script) => + // Scripted date histogram + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, interval = interval) + .script(script) + .minDocCount(1) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, interval = interval) + .script(script) + .minDocCount(1) + } + case _ => + // Standard date histogram + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + DateHistogramAggregation(bucket.name, interval = interval) + .field(currentBucketNestedPath) + .minDocCount(1) + .order(direction match { + case Asc => HistogramOrder("_key", asc = true) + case _ => HistogramOrder("_key", asc = false) + }) + case _ => + DateHistogramAggregation(bucket.name, interval = interval) + .field(currentBucketNestedPath) + .minDocCount(1) + } } - aggScript match { - case Some(script) => - // Scripted date histogram - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - DateHistogramAggregation(bucket.name, interval = interval) - .script(script) - .minDocCount(1) - .order(direction match { - case Asc => HistogramOrder("_key", asc = true) - case _ => HistogramOrder("_key", asc = false) - }) + case _ => + aggScript match { + case Some(script) => + // Scripted terms aggregation + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + TermsAggregation(bucket.name) + .script(script) + .minDocCount(1) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + TermsAggregation(bucket.name) + .script(script) + .minDocCount(1) + } + case _ => + // Standard terms aggregation + bucketsDirection.get(bucket.identifier.identifierName) match { + case Some(direction) => + termsAgg(bucket.name, currentBucketNestedPath) + .minDocCount(1) + .order(Seq(direction match { + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) + })) + case _ => + termsAgg(bucket.name, currentBucketNestedPath) + .minDocCount(1) + } + } + } + } + agg match { + case termsAgg: TermsAggregation => + bucket.size.foreach(s => agg = termsAgg.size(s)) + having match { + case Some(criteria) => + criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = termsAgg.include(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = termsAgg.include(values.toArray) case _ => - DateHistogramAggregation(bucket.name, interval = interval) - .script(script) - .minDocCount(1) } - case _ => - // Standard date histogram - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - DateHistogramAggregation(bucket.name, interval = interval) - .field(currentBucketNestedPath) - .minDocCount(1) - .order(direction match { - case Asc => HistogramOrder("_key", asc = true) - case _ => HistogramOrder("_key", asc = false) - }) + criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = termsAgg.exclude(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = termsAgg.exclude(values.toArray) case _ => - DateHistogramAggregation(bucket.name, interval = interval) - .field(currentBucketNestedPath) - .minDocCount(1) } + case _ => } - case _ => - aggScript match { - case Some(script) => - // Scripted terms aggregation - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - TermsAggregation(bucket.name) - .script(script) - .minDocCount(1) - .order(Seq(direction match { - case Asc => TermsOrder("_key", asc = true) - case _ => TermsOrder("_key", asc = false) - })) - case _ => - TermsAggregation(bucket.name) - .script(script) - .minDocCount(1) - } + } + current match { + case nodes if nodes.nonEmpty => + val childNodes = + nodes.filter(_.node.parentBucketPath.getOrElse("") == node.bucketPath) + agg match { + case termsAgg: TermsAggregation => + agg = termsAgg.subaggs(aggregations ++ childNodes.map(_.agg)) + case dateHistogramAgg: DateHistogramAggregation => + agg = dateHistogramAgg.subaggs(aggregations ++ childNodes.map(_.agg)) case _ => - // Standard terms aggregation - bucketsDirection.get(bucket.identifier.identifierName) match { - case Some(direction) => - termsAgg(bucket.name, currentBucketNestedPath) - .minDocCount(1) - .order(Seq(direction match { - case Asc => TermsOrder("_key", asc = true) - case _ => TermsOrder("_key", asc = false) - })) - case _ => - termsAgg(bucket.name, currentBucketNestedPath) - .minDocCount(1) - } } - } - } - agg match { - case termsAgg: TermsAggregation => - bucket.size.foreach(s => agg = termsAgg.size(s)) - having match { - case Some(criteria) => - criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { - case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => - agg = termsAgg.include(regex) - case BucketIncludesExcludes(values, _) if values.nonEmpty => - agg = termsAgg.include(values.toArray) - case _ => - } - criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { - case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => - agg = termsAgg.exclude(regex) - case BucketIncludesExcludes(values, _) if values.nonEmpty => - agg = termsAgg.exclude(values.toArray) - case _ => - } - case _ => - } - case _ => - } - current match { - case Some(subAgg) => - agg match { - case termsAgg: TermsAggregation => - agg = termsAgg.subaggs(aggregations :+ subAgg) - case dateHistogramAgg: DateHistogramAggregation => - agg = dateHistogramAgg.subaggs(aggregations :+ subAgg) - case _ => - } - Some(agg) - case None => - val subaggs = - having match { - case Some(criteria) => - val script = metricSelectorForBucket( - criteria, - nested, - allElasticAggregations - ) - - if (script.nonEmpty) { - val bucketSelector = - bucketSelectorAggregation( - "having_filter", - Script(script), - extractMetricsPathForBucket( - criteria, - nested, - allElasticAggregations + NodeAggregation(node, agg) +: nodes + case Nil => + val subaggs = + having match { + case Some(criteria) => + val script = metricSelectorForBucket( + criteria, + nested, + allElasticAggregations + ) + + if (script.nonEmpty) { + val bucketSelector = + bucketSelectorAggregation( + "having_filter", + Script(script), + extractMetricsPathForBucket( + criteria, + nested, + allElasticAggregations + ) ) - ) - aggregations :+ bucketSelector - } else { + aggregations :+ bucketSelector + } else { + aggregations + } + case None => aggregations - } - case None => - aggregations + } + + agg match { + case termsAgg: TermsAggregation => + val aggregationsWithOrder: Seq[TermsOrder] = + aggregationsDirection.toSeq.map { kv => + kv._2 match { + case Asc => TermsOrder(kv._1, asc = true) + case _ => TermsOrder(kv._1, asc = false) + } + } + if (aggregationsWithOrder.nonEmpty) + agg = termsAgg.order(aggregationsWithOrder).copy(subaggs = subaggs) + else + agg = termsAgg.copy(subaggs = subaggs) + case dateHistogramAggregation: DateHistogramAggregation => + agg = dateHistogramAggregation.copy(subaggs = subaggs) } - agg match { - case termsAgg: TermsAggregation => - val aggregationsWithOrder: Seq[TermsOrder] = aggregationsDirection.toSeq.map { kv => - kv._2 match { - case Asc => TermsOrder(kv._1, asc = true) - case _ => TermsOrder(kv._1, asc = false) - } - } - if (aggregationsWithOrder.nonEmpty) - agg = termsAgg.order(aggregationsWithOrder).copy(subaggs = subaggs) - else - agg = termsAgg.copy(subaggs = subaggs) - case dateHistogramAggregation: DateHistogramAggregation => - agg = dateHistogramAggregation.copy(subaggs = subaggs) - } - Some(agg) + Seq(NodeAggregation(node, agg)) + } } - } + + treeNodes.headOption.map(_.agg) + } - } + }.flatten /** Generates the bucket_selector script for a given bucket */ @@ -781,3 +790,5 @@ object ElasticAggregation { result } } + +private case class NodeAggregation(node: BucketNode, agg: Aggregation) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala index 443314d2..db274a33 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala @@ -68,7 +68,9 @@ object Parser def apply( query: String ): Either[ParserError, Either[SQLSearchRequest, SQLMultiSearchRequest]] = { - val reader = new PackratReader(new CharSequenceReader(query)) + val normalizedQuery = + query.split("\n").map(_.trim).filterNot(_.isEmpty).filterNot(_.startsWith("--")).mkString(" ") + val reader = new PackratReader(new CharSequenceReader(normalizedQuery)) parse(requests, reader) match { case NoSuccess(msg, _) => Console.err.println(msg) @@ -145,6 +147,7 @@ trait Parser "current_datetime", "current_timestamp", "now", + "today", "coalesce", "nullif", "isnull", @@ -161,33 +164,33 @@ trait Parser "datetime_add", "datetime_sub", "interval", - "year", - "month", - "day", - "hour", - "minute", - "second", - "quarter", - "char", - "string", - "byte", - "tinyint", - "short", - "smallint", - "int", - "integer", - "long", - "bigint", - "real", - "float", - "double", +// "year", +// "month", +// "day", +// "hour", +// "minute", +// "second", +// "quarter", +// "char", +// "string", +// "byte", +// "tinyint", +// "short", +// "smallint", +// "int", +// "integer", +// "long", +// "bigint", +// "real", +// "float", +// "double", "pi", - "boolean", +// "boolean", "distance", - "time", - "date", - "datetime", - "timestamp", +// "time", +// "date", +// "datetime", +// "timestamp", "and", "or", "not", @@ -245,10 +248,10 @@ trait Parser "last", "array_agg", "first_value", - "last_value" -// "ltrim", -// "rtrim", -// "replace", + "last_value", + "ltrim", + "rtrim", + "replace" ) private val identifierRegexStr = diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala index 75de263c..d86f5582 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala @@ -52,18 +52,18 @@ package object aggregate { def partition_by: PackratParser[Seq[Identifier]] = PARTITION_BY.regex ~> rep1sep(identifierWithTransformation | identifier, separator) - private[this] def over: Parser[(Seq[Identifier], Option[OrderBy])] = - OVER.regex ~> start ~ partition_by.? ~ orderBy.? <~ end ^^ { case _ ~ pb ~ ob => - (pb.getOrElse(Seq.empty), ob) + private[this] def over: Parser[(Seq[Identifier], Option[OrderBy], Option[Limit])] = + OVER.regex ~> start ~ partition_by.? ~ orderBy.? ~ limit.? <~ end ^^ { case _ ~ pb ~ ob ~ l => + (pb.getOrElse(Seq.empty), ob, l) } private[this] def window_function( windowId: PackratParser[Identifier] = identifier - ): PackratParser[(Identifier, Seq[Identifier], Option[OrderBy])] = + ): PackratParser[(Identifier, Seq[Identifier], Option[OrderBy], Option[Limit])] = start ~ windowId ~ end ~ over.? ^^ { case _ ~ id ~ _ ~ o => o match { - case Some((pb, ob)) => (id, pb, ob) - case None => (id, Seq.empty, None) + case Some((pb, ob, l)) => (id, pb, ob, l) + case None => (id, Seq.empty, None, None) } } @@ -91,7 +91,7 @@ package object aggregate { top._1, top._2, top._3.orElse(Option(OrderBy(Seq(FieldSort(top._1, order = None))))), - limit = None + limit = top._4 ) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala index df04b664..8eb01687 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala @@ -199,6 +199,13 @@ case class BucketNode( def depth: Int = 1 + (if (children.isEmpty) 0 else children.map(_.depth).max) + def level: Int = { + parent match { + case Some(p) => 1 + p.level + case None => 0 + } + } + // Check if the node is a leaf def isLeaf: Boolean = children.isEmpty @@ -209,6 +216,10 @@ case class BucketNode( } } + def parentBucketPath: Option[String] = { + parent.map(_.bucketPath) + } + def root: BucketNode = { parent match { case Some(p) => p.root From 5515d6a8b8db2b31ee5610a4e1eec7cd671eb6ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Wed, 3 Dec 2025 11:31:13 +0100 Subject: [PATCH 38/40] to fix regression with window functions --- .../main/scala/app/softnetwork/elastic/client/ScrollApi.scala | 2 +- .../main/scala/app/softnetwork/elastic/client/SearchApi.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala index 3a2b84cd..6d738565 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala @@ -122,7 +122,7 @@ trait ScrollApi extends ElasticClientHelpers { )(implicit system: ActorSystem): Source[(Map[String, Any], ScrollMetrics), NotUsed] = { sql.request match { case Some(Left(single)) => - if (single.windowFunctions.nonEmpty && single.fields.nonEmpty) + if (single.windowFunctions.nonEmpty) return scrollWithWindowEnrichment(sql, single, config) val sqlRequest = single.copy(score = sql.score) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala index 964de949..947ae75f 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala @@ -68,7 +68,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { collection.immutable.Seq(single.sources: _*), sql = Some(sql.query) ) - if (single.windowFunctions.exists(_.isWindowing) && single.fields.nonEmpty) + if (single.windowFunctions.exists(_.isWindowing) && single.groupBy.isEmpty) searchWithWindowEnrichment(sql, single) else singleSearch(elasticQuery, single.fieldAliases, single.sqlAggregations) From dc226a721ac6e0d9567b58db395a3961308e45ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Wed, 3 Dec 2025 12:01:46 +0100 Subject: [PATCH 39/40] update README.md --- README.md | 147 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 135 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index f75885c5..1ea50a39 100644 --- a/README.md +++ b/README.md @@ -470,7 +470,9 @@ case class ProductSalesAnalysis( uniqueCustomers: Long // COUNT DISTINCT OVER ) -val sqlQuery = """ +// Type-safe execution with compile-time validation +val results: Source[ProductSalesAnalysis, NotUsed] = + client.scrollAs[ProductSalesAnalysis](""" SELECT product_id AS productId, product_name AS productName, @@ -505,23 +507,19 @@ val sqlQuery = """ WHERE sale_date >= '2024-01-01' GROUP BY product_id, product_name, DATE_TRUNC('month', sale_date) ORDER BY product_id, saleMonth -""" - -// Type-safe execution with compile-time validation -val results: Source[ProductSalesAnalysis, NotUsed] = - client.scrollAs[ProductSalesAnalysis](sqlQuery) +""") results.runWith(Sink.foreach { analysis => println(s""" Product: ${analysis.productName} (${analysis.productId}) Month: ${analysis.saleMonth} - Monthly Sales: $${analysis.monthlySales} - First Sale: $${analysis.firstSaleAmount} - Last Sale: $${analysis.lastSaleAmount} + Monthly Sales: ${analysis.monthlySales} + First Sale: ${analysis.firstSaleAmount} + Last Sale: ${analysis.lastSaleAmount} All Sales: ${analysis.allSaleAmounts.mkString("[", ", ", "]")} - Total Sales (All Time): $${analysis.totalSales} - Average Sale: $${analysis.avgSaleAmount} - Price Range: $${analysis.minSaleAmount} - $${analysis.maxSaleAmount} + Total Sales (All Time): ${analysis.totalSales} + Average Sale: ${analysis.avgSaleAmount} + Price Range: ${analysis.minSaleAmount} - ${analysis.maxSaleAmount} Sale Count: ${analysis.saleCount} Unique Customers: ${analysis.uniqueCustomers} """) @@ -917,6 +915,131 @@ val sqlQuery = """ """ ``` +#### **Example 3: Translation to Elasticsearch DSL** + +The SQL query above is decomposed into two Elasticsearch queries: + +**Query 1: Window Functions Aggregations** + +```json +{ + "query": { + "bool": { + "filter": [ + { + "range": { + "timestamp": { + "gte": "now-1H" + } + } + } + ] + } + }, + "size": 0, + "_source": false, + "aggs": { + "sensorId": { + "terms": { + "field": "sensor_id", + "min_doc_count": 1 + }, + "aggs": { + "movingAvg": { + "avg": { + "field": "temperature" + } + }, + "minTemp": { + "min": { + "field": "temperature" + } + }, + "maxTemp": { + "max": { + "field": "temperature" + } + }, + "firstReading": { + "top_hits": { + "size": 1, + "sort": [ + { + "timestamp": { + "order": "asc" + } + } + ], + "_source": { + "includes": [ + "temperature" + ] + } + } + }, + "currentReading": { + "top_hits": { + "size": 1, + "sort": [ + { + "timestamp": { + "order": "desc" + } + } + ], + "_source": { + "includes": [ + "temperature" + ] + } + } + } + } + } + } +} +``` +**Query 2: Main Query (Non-Aggregated Fields)** + +Since this query includes non-aggregated fields, a separate search query would be executed: + +```json +{ + "query": { + "bool": { + "filter": [ + { + "range": { + "timestamp": { + "gte": "now-1H" + } + } + } + ] + } + }, + "sort": [ + { + "sensorId": { + "order": "asc" + } + }, + { + "timestamp": { + "order": "asc" + } + } + ], + "_source": { + "includes": [ + "sensor_id", + "timestamp", + "temperature" + ] + } +} +``` + #### **Performance Considerations** | Consideration | Recommendation | Impact | From ff1349c1168268b699d7fb9aa1f7e809e2416176 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Wed, 3 Dec 2025 12:09:40 +0100 Subject: [PATCH 40/40] update query normalization before parsing it --- .../scala/app/softnetwork/elastic/sql/parser/Parser.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala index db274a33..ec798f2a 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala @@ -69,7 +69,11 @@ object Parser query: String ): Either[ParserError, Either[SQLSearchRequest, SQLMultiSearchRequest]] = { val normalizedQuery = - query.split("\n").map(_.trim).filterNot(_.isEmpty).filterNot(_.startsWith("--")).mkString(" ") + query + .split("\n") + .map(_.split("--")(0).trim) + .filterNot(w => w.isEmpty || w.startsWith("--")) + .mkString(" ") val reader = new PackratReader(new CharSequenceReader(normalizedQuery)) parse(requests, reader) match { case NoSuccess(msg, _) =>